llm-wiki-compiler 0.5.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +156 -15
- package/dist/cli.js +2602 -455
- package/dist/cli.js.map +1 -1
- package/dist/viewer/assets/index.html +71 -0
- package/dist/viewer/assets/llmwiki-logo-64.png +0 -0
- package/dist/viewer/assets/viewer-rail.js +181 -0
- package/dist/viewer/assets/viewer-search.js +185 -0
- package/dist/viewer/assets/viewer-sidebar.js +151 -0
- package/dist/viewer/assets/viewer.css +314 -0
- package/dist/viewer/assets/viewer.js +363 -0
- package/package.json +7 -1
package/dist/cli.js
CHANGED
|
@@ -2,17 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
// src/cli.ts
|
|
4
4
|
import "dotenv/config";
|
|
5
|
-
import { createRequire } from "module";
|
|
5
|
+
import { createRequire as createRequire2 } from "module";
|
|
6
6
|
import { Command } from "commander";
|
|
7
7
|
|
|
8
8
|
// src/commands/ingest.ts
|
|
9
|
-
import
|
|
10
|
-
import {
|
|
9
|
+
import path8 from "path";
|
|
10
|
+
import { readFile as readFile7 } from "fs/promises";
|
|
11
11
|
|
|
12
12
|
// src/utils/markdown.ts
|
|
13
13
|
import { writeFile, rename, readFile, mkdir } from "fs/promises";
|
|
14
14
|
import path from "path";
|
|
15
15
|
import yaml from "js-yaml";
|
|
16
|
+
var CITATION_MARKER_PATTERN = /\^\[([^\]]+)\]/g;
|
|
16
17
|
var SPAN_SUFFIX_PATTERN = /^(?<file>[^:#]+)(?:(?::(?<colonStart>\d+)(?:-(?<colonEnd>\d+))?)|(?:#L(?<hashStart>\d+)(?:-L(?<hashEnd>\d+))?))?$/;
|
|
17
18
|
var MIN_LINE_NUMBER = 1;
|
|
18
19
|
var VALID_PROVENANCE_STATES = /* @__PURE__ */ new Set([
|
|
@@ -22,7 +23,7 @@ var VALID_PROVENANCE_STATES = /* @__PURE__ */ new Set([
|
|
|
22
23
|
"ambiguous"
|
|
23
24
|
]);
|
|
24
25
|
function slugify(title) {
|
|
25
|
-
return title.toLowerCase().replace(/['']/g, "").replace(/[^\
|
|
26
|
+
return title.toLowerCase().replace(/['']/g, "").replace(/[^\p{L}\p{N}\s-]/gu, "").replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
|
|
26
27
|
}
|
|
27
28
|
function buildFrontmatter(fields) {
|
|
28
29
|
const dumped = yaml.dump(fields, { lineWidth: -1, quotingType: '"' }).trimEnd();
|
|
@@ -31,19 +32,27 @@ ${dumped}
|
|
|
31
32
|
---`;
|
|
32
33
|
}
|
|
33
34
|
function parseFrontmatter(content) {
|
|
35
|
+
const { meta, body } = parseFrontmatterStatus(content);
|
|
36
|
+
return { meta, body };
|
|
37
|
+
}
|
|
38
|
+
function parseFrontmatterStatus(content) {
|
|
34
39
|
const match = content.match(/^---\n([\s\S]*?)\n---\n?([\s\S]*)$/);
|
|
35
40
|
if (!match) {
|
|
36
|
-
return { meta: {}, body: content };
|
|
41
|
+
return { meta: {}, body: content, hasFrontmatterBlock: false, malformedFrontmatter: false };
|
|
37
42
|
}
|
|
38
43
|
let meta = {};
|
|
44
|
+
let malformedFrontmatter = false;
|
|
39
45
|
try {
|
|
40
46
|
const parsed = yaml.load(match[1]);
|
|
41
47
|
if (parsed && typeof parsed === "object") {
|
|
42
48
|
meta = parsed;
|
|
49
|
+
} else if (parsed !== null && parsed !== void 0) {
|
|
50
|
+
malformedFrontmatter = true;
|
|
43
51
|
}
|
|
44
52
|
} catch {
|
|
53
|
+
malformedFrontmatter = true;
|
|
45
54
|
}
|
|
46
|
-
return { meta, body: match[2] };
|
|
55
|
+
return { meta, body: match[2], hasFrontmatterBlock: true, malformedFrontmatter };
|
|
47
56
|
}
|
|
48
57
|
async function atomicWrite(filePath, content) {
|
|
49
58
|
await mkdir(path.dirname(filePath), { recursive: true });
|
|
@@ -51,6 +60,41 @@ async function atomicWrite(filePath, content) {
|
|
|
51
60
|
await writeFile(tmpPath, content, "utf-8");
|
|
52
61
|
await rename(tmpPath, filePath);
|
|
53
62
|
}
|
|
63
|
+
function extractClaimCitations(body) {
|
|
64
|
+
const citations = [];
|
|
65
|
+
let match;
|
|
66
|
+
CITATION_MARKER_PATTERN.lastIndex = 0;
|
|
67
|
+
while ((match = CITATION_MARKER_PATTERN.exec(body)) !== null) {
|
|
68
|
+
const raw = match[1];
|
|
69
|
+
const spans = parseCitationEntries(raw);
|
|
70
|
+
if (spans.length > 0) citations.push({ raw, spans });
|
|
71
|
+
}
|
|
72
|
+
return citations;
|
|
73
|
+
}
|
|
74
|
+
function parseCitationEntries(inner) {
|
|
75
|
+
const spans = [];
|
|
76
|
+
for (const part of inner.split(",")) {
|
|
77
|
+
const trimmed = part.trim();
|
|
78
|
+
if (trimmed.length === 0) continue;
|
|
79
|
+
const span = parseSpanEntry(trimmed);
|
|
80
|
+
if (span !== void 0) spans.push(span);
|
|
81
|
+
}
|
|
82
|
+
return spans;
|
|
83
|
+
}
|
|
84
|
+
function parseSpanEntry(entry) {
|
|
85
|
+
const match = SPAN_SUFFIX_PATTERN.exec(entry);
|
|
86
|
+
if (!match || !match.groups) {
|
|
87
|
+
return { file: entry };
|
|
88
|
+
}
|
|
89
|
+
const { file, colonStart, colonEnd, hashStart, hashEnd } = match.groups;
|
|
90
|
+
const start = colonStart ?? hashStart;
|
|
91
|
+
const end = colonEnd ?? hashEnd;
|
|
92
|
+
if (start === void 0) return { file };
|
|
93
|
+
const startLine = Number(start);
|
|
94
|
+
const endLine = end === void 0 ? startLine : Number(end);
|
|
95
|
+
if (!isValidLineRange(startLine, endLine)) return void 0;
|
|
96
|
+
return { file, lines: { start: startLine, end: endLine } };
|
|
97
|
+
}
|
|
54
98
|
function isValidLineRange(start, end) {
|
|
55
99
|
return start >= MIN_LINE_NUMBER && end >= start;
|
|
56
100
|
}
|
|
@@ -103,16 +147,11 @@ function parseContradictedBy(raw) {
|
|
|
103
147
|
const refs = raw.map(coerceContradictionEntry).filter((ref) => ref !== null);
|
|
104
148
|
return refs.length > 0 ? refs : void 0;
|
|
105
149
|
}
|
|
106
|
-
function parseInferredParagraphs(raw) {
|
|
107
|
-
if (typeof raw !== "number" || !Number.isInteger(raw) || raw < 0) return void 0;
|
|
108
|
-
return raw;
|
|
109
|
-
}
|
|
110
150
|
function parseProvenanceMetadata(meta) {
|
|
111
151
|
return {
|
|
112
152
|
confidence: parseConfidence(meta.confidence),
|
|
113
153
|
provenanceState: parseProvenanceState(meta.provenanceState),
|
|
114
|
-
contradictedBy: parseContradictedBy(meta.contradictedBy)
|
|
115
|
-
inferredParagraphs: parseInferredParagraphs(meta.inferredParagraphs)
|
|
154
|
+
contradictedBy: parseContradictedBy(meta.contradictedBy)
|
|
116
155
|
};
|
|
117
156
|
}
|
|
118
157
|
function validateWikiPage(content) {
|
|
@@ -123,9 +162,16 @@ function validateWikiPage(content) {
|
|
|
123
162
|
return true;
|
|
124
163
|
}
|
|
125
164
|
|
|
165
|
+
// src/utils/source-writer.ts
|
|
166
|
+
import { mkdir as mkdir2, readFile as readFile2, writeFile as writeFile2 } from "fs/promises";
|
|
167
|
+
import path2 from "path";
|
|
168
|
+
import { createHash } from "crypto";
|
|
169
|
+
|
|
126
170
|
// src/utils/constants.ts
|
|
127
171
|
var MAX_SOURCE_CHARS = 1e5;
|
|
128
172
|
var MIN_SOURCE_CHARS = 50;
|
|
173
|
+
var DEFAULT_PROMPT_BUDGET_CHARS = 2e5;
|
|
174
|
+
var PROMPT_BUDGET_ENV_VAR = "LLMWIKI_PROMPT_BUDGET_CHARS";
|
|
129
175
|
var QUERY_PAGE_LIMIT = 5;
|
|
130
176
|
var COMPILE_CONCURRENCY = 5;
|
|
131
177
|
var RETRY_COUNT = 3;
|
|
@@ -136,9 +182,11 @@ var PROVIDER_MODELS = {
|
|
|
136
182
|
anthropic: "claude-sonnet-4-20250514",
|
|
137
183
|
openai: "gpt-4o",
|
|
138
184
|
ollama: "llama3.1",
|
|
139
|
-
minimax: "MiniMax-M2.7"
|
|
185
|
+
minimax: "MiniMax-M2.7",
|
|
186
|
+
copilot: "gpt-4o"
|
|
140
187
|
};
|
|
141
188
|
var OLLAMA_DEFAULT_HOST = "http://localhost:11434/v1";
|
|
189
|
+
var COPILOT_BASE_URL = "https://api.githubcopilot.com";
|
|
142
190
|
var OPENAI_DEFAULT_TIMEOUT_MS = 10 * 60 * 1e3;
|
|
143
191
|
var OLLAMA_DEFAULT_TIMEOUT_MS = 30 * 60 * 1e3;
|
|
144
192
|
var SOURCES_DIR = "sources";
|
|
@@ -150,6 +198,7 @@ var LOCK_FILE = ".llmwiki/lock";
|
|
|
150
198
|
var INDEX_FILE = "wiki/index.md";
|
|
151
199
|
var MOC_FILE = "wiki/MOC.md";
|
|
152
200
|
var EMBEDDINGS_FILE = ".llmwiki/embeddings.json";
|
|
201
|
+
var LAST_LINT_FILE = ".llmwiki/last-lint.json";
|
|
153
202
|
var IMAGE_EXTENSIONS = /* @__PURE__ */ new Set([".jpg", ".jpeg", ".png", ".gif", ".webp"]);
|
|
154
203
|
var TRANSCRIPT_EXTENSIONS = /* @__PURE__ */ new Set([".vtt", ".srt"]);
|
|
155
204
|
var IMAGE_DESCRIBE_MAX_TOKENS = 2048;
|
|
@@ -169,6 +218,42 @@ var EMBEDDING_MODELS = {
|
|
|
169
218
|
ollama: "nomic-embed-text"
|
|
170
219
|
};
|
|
171
220
|
|
|
221
|
+
// src/utils/source-writer.ts
|
|
222
|
+
var COLLISION_HASH_LEN = 8;
|
|
223
|
+
function shortHashOfSource(source2) {
|
|
224
|
+
return createHash("sha256").update(source2).digest("hex").slice(0, COLLISION_HASH_LEN);
|
|
225
|
+
}
|
|
226
|
+
async function resolveCollisionFreeFilename(slug, source2) {
|
|
227
|
+
const candidate = `${slug}.md`;
|
|
228
|
+
const candidatePath2 = path2.join(SOURCES_DIR, candidate);
|
|
229
|
+
let existing;
|
|
230
|
+
try {
|
|
231
|
+
existing = await readFile2(candidatePath2, "utf-8");
|
|
232
|
+
} catch (err) {
|
|
233
|
+
const e = err;
|
|
234
|
+
if (e.code === "ENOENT") return candidate;
|
|
235
|
+
throw err;
|
|
236
|
+
}
|
|
237
|
+
const { meta } = parseFrontmatter(existing);
|
|
238
|
+
if (typeof meta.source === "string" && meta.source === source2) {
|
|
239
|
+
return candidate;
|
|
240
|
+
}
|
|
241
|
+
return `${slug}-${shortHashOfSource(source2)}.md`;
|
|
242
|
+
}
|
|
243
|
+
async function saveSource(title, document, source2) {
|
|
244
|
+
const slug = slugify(title);
|
|
245
|
+
if (!slug) {
|
|
246
|
+
throw new Error(
|
|
247
|
+
`Could not derive a filename from title "${title}". The title contains no letter or number characters. Rename the source file to one with at least one letter or digit.`
|
|
248
|
+
);
|
|
249
|
+
}
|
|
250
|
+
await mkdir2(SOURCES_DIR, { recursive: true });
|
|
251
|
+
const filename = await resolveCollisionFreeFilename(slug, source2);
|
|
252
|
+
const destPath = path2.join(SOURCES_DIR, filename);
|
|
253
|
+
await writeFile2(destPath, document, "utf-8");
|
|
254
|
+
return destPath;
|
|
255
|
+
}
|
|
256
|
+
|
|
172
257
|
// src/utils/output.ts
|
|
173
258
|
var RESET = "\x1B[0m";
|
|
174
259
|
var BOLD = "\x1B[1m";
|
|
@@ -244,13 +329,13 @@ async function ingestWeb(url) {
|
|
|
244
329
|
}
|
|
245
330
|
|
|
246
331
|
// src/ingest/file.ts
|
|
247
|
-
import { readFile as
|
|
248
|
-
import
|
|
332
|
+
import { readFile as readFile3 } from "fs/promises";
|
|
333
|
+
import path4 from "path";
|
|
249
334
|
|
|
250
335
|
// src/ingest/shared.ts
|
|
251
|
-
import
|
|
336
|
+
import path3 from "path";
|
|
252
337
|
function titleFromFilename(filePath) {
|
|
253
|
-
const basename =
|
|
338
|
+
const basename = path3.basename(filePath, path3.extname(filePath));
|
|
254
339
|
return basename.replace(/[-_]+/g, " ").trim();
|
|
255
340
|
}
|
|
256
341
|
|
|
@@ -262,20 +347,20 @@ ${text}
|
|
|
262
347
|
\`\`\``;
|
|
263
348
|
}
|
|
264
349
|
async function ingestFile(filePath) {
|
|
265
|
-
const ext =
|
|
350
|
+
const ext = path4.extname(filePath).toLowerCase();
|
|
266
351
|
if (!SUPPORTED_EXTENSIONS.has(ext)) {
|
|
267
352
|
throw new Error(
|
|
268
353
|
`Unsupported file type "${ext}". Only .md and .txt files are supported.`
|
|
269
354
|
);
|
|
270
355
|
}
|
|
271
|
-
const raw = await
|
|
356
|
+
const raw = await readFile3(filePath, "utf-8");
|
|
272
357
|
const title = titleFromFilename(filePath);
|
|
273
358
|
const content = ext === ".md" ? raw : wrapPlainText(raw);
|
|
274
359
|
return { title, content };
|
|
275
360
|
}
|
|
276
361
|
|
|
277
362
|
// src/ingest/pdf.ts
|
|
278
|
-
import { readFile as
|
|
363
|
+
import { readFile as readFile4 } from "fs/promises";
|
|
279
364
|
function resolveTitle(filePath, info2) {
|
|
280
365
|
if (info2 && typeof info2 === "object") {
|
|
281
366
|
const titleField = info2["Title"];
|
|
@@ -287,7 +372,7 @@ function resolveTitle(filePath, info2) {
|
|
|
287
372
|
}
|
|
288
373
|
async function ingestPdf(filePath) {
|
|
289
374
|
const { PDFParse } = await import("pdf-parse");
|
|
290
|
-
const buffer = await
|
|
375
|
+
const buffer = await readFile4(filePath);
|
|
291
376
|
const parser = new PDFParse({ data: new Uint8Array(buffer) });
|
|
292
377
|
try {
|
|
293
378
|
const textResult = await parser.getText();
|
|
@@ -301,8 +386,8 @@ async function ingestPdf(filePath) {
|
|
|
301
386
|
}
|
|
302
387
|
|
|
303
388
|
// src/ingest/image.ts
|
|
304
|
-
import { readFile as
|
|
305
|
-
import
|
|
389
|
+
import { readFile as readFile5 } from "fs/promises";
|
|
390
|
+
import path6 from "path";
|
|
306
391
|
import Anthropic2 from "@anthropic-ai/sdk";
|
|
307
392
|
|
|
308
393
|
// src/providers/anthropic.ts
|
|
@@ -419,7 +504,7 @@ var AnthropicProvider = class {
|
|
|
419
504
|
// src/utils/claude-settings.ts
|
|
420
505
|
import { readFileSync } from "fs";
|
|
421
506
|
import { homedir } from "os";
|
|
422
|
-
import
|
|
507
|
+
import path5 from "path";
|
|
423
508
|
var CLAUDE_SETTINGS_PATH_ENV = "LLMWIKI_CLAUDE_SETTINGS_PATH";
|
|
424
509
|
function isRecord(value) {
|
|
425
510
|
return typeof value === "object" && value !== null;
|
|
@@ -430,7 +515,7 @@ function normalize(value) {
|
|
|
430
515
|
return trimmed.length > 0 ? trimmed : void 0;
|
|
431
516
|
}
|
|
432
517
|
function resolveClaudeSettingsPath(env) {
|
|
433
|
-
return env[CLAUDE_SETTINGS_PATH_ENV] ??
|
|
518
|
+
return env[CLAUDE_SETTINGS_PATH_ENV] ?? path5.join(homedir(), ".claude", "settings.json");
|
|
434
519
|
}
|
|
435
520
|
function readClaudeSettingsFile(settingsPath) {
|
|
436
521
|
try {
|
|
@@ -563,9 +648,9 @@ async function ingestImage(filePath) {
|
|
|
563
648
|
`Image ingest requires the Anthropic provider (vision). Current provider: "${providerName}". Set LLMWIKI_PROVIDER=anthropic and ANTHROPIC_API_KEY to use image ingest.`
|
|
564
649
|
);
|
|
565
650
|
}
|
|
566
|
-
const ext =
|
|
651
|
+
const ext = path6.extname(filePath).toLowerCase();
|
|
567
652
|
const mimeType = mimeTypeForExtension(ext);
|
|
568
|
-
const imageBuffer = await
|
|
653
|
+
const imageBuffer = await readFile5(filePath);
|
|
569
654
|
const imageData = imageBuffer.toString("base64");
|
|
570
655
|
const client = buildClient();
|
|
571
656
|
const model = resolveAnthropicModelFromEnv() ?? PROVIDER_MODELS.anthropic;
|
|
@@ -575,8 +660,8 @@ async function ingestImage(filePath) {
|
|
|
575
660
|
}
|
|
576
661
|
|
|
577
662
|
// src/ingest/transcript.ts
|
|
578
|
-
import { readFile as
|
|
579
|
-
import
|
|
663
|
+
import { readFile as readFile6 } from "fs/promises";
|
|
664
|
+
import path7 from "path";
|
|
580
665
|
import { YoutubeTranscript } from "youtube-transcript";
|
|
581
666
|
var YOUTUBE_URL_PATTERN = /^https?:\/\/(www\.)?(youtube\.com\/watch|youtu\.be\/)/;
|
|
582
667
|
var SRT_SEQUENCE_PATTERN = /^\d+$/;
|
|
@@ -661,8 +746,8 @@ async function ingestTranscript(source2) {
|
|
|
661
746
|
if (isYoutubeUrl(source2)) {
|
|
662
747
|
return fetchYoutubeTranscript(source2);
|
|
663
748
|
}
|
|
664
|
-
const ext =
|
|
665
|
-
const raw = await
|
|
749
|
+
const ext = path7.extname(source2).toLowerCase();
|
|
750
|
+
const raw = await readFile6(source2, "utf-8");
|
|
666
751
|
if (ext === ".vtt") return parseVtt(raw, source2);
|
|
667
752
|
if (ext === ".srt") return parseSrt(raw, source2);
|
|
668
753
|
if (ext === ".txt") return parsePlainTranscript(raw, source2);
|
|
@@ -701,7 +786,7 @@ function hasSpeakerDialoguePattern(sample) {
|
|
|
701
786
|
return hasEnoughSpeakers && hasRepeatedSpeaker;
|
|
702
787
|
}
|
|
703
788
|
async function looksLikeTxtTranscript(filePath) {
|
|
704
|
-
const raw = await
|
|
789
|
+
const raw = await readFile7(filePath, "utf-8");
|
|
705
790
|
const sample = raw.slice(0, TXT_SNIFF_BYTES);
|
|
706
791
|
if (hasSpeakerDialoguePattern(sample)) return true;
|
|
707
792
|
const timestampMatches = sample.match(new RegExp(TIMESTAMP_PATTERN2.source, "gm"));
|
|
@@ -723,124 +808,1571 @@ function enforceCharLimit(content) {
|
|
|
723
808
|
originalChars: content.length
|
|
724
809
|
};
|
|
725
810
|
}
|
|
726
|
-
function enforceMinContent(content) {
|
|
727
|
-
const length = content.trim().length;
|
|
728
|
-
if (length === 0) {
|
|
729
|
-
throw new Error(
|
|
730
|
-
"No readable content could be extracted from the source."
|
|
731
|
-
);
|
|
732
|
-
}
|
|
733
|
-
if (length < MIN_SOURCE_CHARS) {
|
|
734
|
-
status(
|
|
735
|
-
"!",
|
|
736
|
-
warn(
|
|
737
|
-
`Content seems very short (${length} chars, minimum recommended is ${MIN_SOURCE_CHARS}).`
|
|
738
|
-
)
|
|
739
|
-
);
|
|
811
|
+
function enforceMinContent(content) {
|
|
812
|
+
const length = content.trim().length;
|
|
813
|
+
if (length === 0) {
|
|
814
|
+
throw new Error(
|
|
815
|
+
"No readable content could be extracted from the source."
|
|
816
|
+
);
|
|
817
|
+
}
|
|
818
|
+
if (length < MIN_SOURCE_CHARS) {
|
|
819
|
+
status(
|
|
820
|
+
"!",
|
|
821
|
+
warn(
|
|
822
|
+
`Content seems very short (${length} chars, minimum recommended is ${MIN_SOURCE_CHARS}).`
|
|
823
|
+
)
|
|
824
|
+
);
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
async function detectSourceType(source2) {
|
|
828
|
+
if (!isUrl(source2)) {
|
|
829
|
+
const ext = path8.extname(source2).toLowerCase();
|
|
830
|
+
if (ext === ".pdf") return "pdf";
|
|
831
|
+
if (IMAGE_EXTENSIONS.has(ext)) return "image";
|
|
832
|
+
if (TRANSCRIPT_EXTENSIONS.has(ext)) return "transcript";
|
|
833
|
+
if (ext === ".txt") {
|
|
834
|
+
const isTranscript = await looksLikeTxtTranscript(source2);
|
|
835
|
+
return isTranscript ? "transcript" : "file";
|
|
836
|
+
}
|
|
837
|
+
return "file";
|
|
838
|
+
}
|
|
839
|
+
if (isYoutubeUrl(source2)) return "transcript";
|
|
840
|
+
return "web";
|
|
841
|
+
}
|
|
842
|
+
function buildDocument(title, source2, result, sourceType) {
|
|
843
|
+
const meta = {
|
|
844
|
+
title,
|
|
845
|
+
source: source2,
|
|
846
|
+
ingestedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
847
|
+
};
|
|
848
|
+
if (sourceType !== void 0) {
|
|
849
|
+
meta.sourceType = sourceType;
|
|
850
|
+
}
|
|
851
|
+
if (result.truncated) {
|
|
852
|
+
meta.truncated = true;
|
|
853
|
+
meta.originalChars = result.originalChars;
|
|
854
|
+
}
|
|
855
|
+
const frontmatter = buildFrontmatter(meta);
|
|
856
|
+
return `${frontmatter}
|
|
857
|
+
|
|
858
|
+
${result.content}
|
|
859
|
+
`;
|
|
860
|
+
}
|
|
861
|
+
async function fetchContent(source2, sourceType) {
|
|
862
|
+
switch (sourceType) {
|
|
863
|
+
case "web":
|
|
864
|
+
return ingestWeb(source2);
|
|
865
|
+
case "pdf":
|
|
866
|
+
return ingestPdf(source2);
|
|
867
|
+
case "image":
|
|
868
|
+
return ingestImage(source2);
|
|
869
|
+
case "transcript":
|
|
870
|
+
return ingestTranscript(source2);
|
|
871
|
+
case "file":
|
|
872
|
+
return ingestFile(source2);
|
|
873
|
+
}
|
|
874
|
+
}
|
|
875
|
+
async function ingestSource(source2) {
|
|
876
|
+
const sourceType = await detectSourceType(source2);
|
|
877
|
+
status("*", info(`Ingesting [${sourceType}]: ${source2}`));
|
|
878
|
+
const { title, content } = await fetchContent(source2, sourceType);
|
|
879
|
+
const result = enforceCharLimit(content);
|
|
880
|
+
enforceMinContent(result.content);
|
|
881
|
+
const document = buildDocument(title, source2, result, sourceType);
|
|
882
|
+
const savedPath = await saveSource(title, document, source2);
|
|
883
|
+
return {
|
|
884
|
+
filename: path8.basename(savedPath),
|
|
885
|
+
charCount: result.content.length,
|
|
886
|
+
truncated: result.truncated,
|
|
887
|
+
source: source2,
|
|
888
|
+
sourceType
|
|
889
|
+
};
|
|
890
|
+
}
|
|
891
|
+
async function ingest(source2) {
|
|
892
|
+
const result = await ingestSource(source2);
|
|
893
|
+
const savedPath = path8.join(SOURCES_DIR, result.filename);
|
|
894
|
+
status(
|
|
895
|
+
"+",
|
|
896
|
+
success(`Saved ${bold(result.filename)} \u2192 ${source(savedPath)}`)
|
|
897
|
+
);
|
|
898
|
+
status("\u2192", dim("Next: llmwiki compile"));
|
|
899
|
+
}
|
|
900
|
+
|
|
901
|
+
// src/commands/ingest-session.ts
|
|
902
|
+
import path12 from "path";
|
|
903
|
+
import { readdir, stat } from "fs/promises";
|
|
904
|
+
|
|
905
|
+
// src/adapters/claude.ts
|
|
906
|
+
import { readFile as readFile8 } from "fs/promises";
|
|
907
|
+
import path9 from "path";
|
|
908
|
+
|
|
909
|
+
// src/adapters/utils.ts
|
|
910
|
+
var MAX_TITLE_CHARS = 80;
|
|
911
|
+
function truncateTitle(text) {
|
|
912
|
+
const trimmed = text.trim();
|
|
913
|
+
return trimmed.length > MAX_TITLE_CHARS ? trimmed.slice(0, MAX_TITLE_CHARS).trimEnd() + "\u2026" : trimmed;
|
|
914
|
+
}
|
|
915
|
+
function resolveSessionTitle(rawTitle, firstUserContent, defaultTitle) {
|
|
916
|
+
if (rawTitle && rawTitle.trim().length > 0) return truncateTitle(rawTitle);
|
|
917
|
+
if (firstUserContent) {
|
|
918
|
+
const firstLine = firstUserContent.split("\n")[0];
|
|
919
|
+
if (firstLine.trim().length > 0) return truncateTitle(firstLine);
|
|
920
|
+
}
|
|
921
|
+
return defaultTitle;
|
|
922
|
+
}
|
|
923
|
+
function parseJsonOrThrow(raw, filePath) {
|
|
924
|
+
try {
|
|
925
|
+
return JSON.parse(raw);
|
|
926
|
+
} catch {
|
|
927
|
+
throw new Error(`Invalid JSON in session file: ${filePath}`);
|
|
928
|
+
}
|
|
929
|
+
}
|
|
930
|
+
|
|
931
|
+
// src/adapters/claude.ts
|
|
932
|
+
var CLAUDE_EXTENSION = ".jsonl";
|
|
933
|
+
var CLAUDE_TYPE_MARKERS = /* @__PURE__ */ new Set(["user", "assistant", "system", "tool_use", "tool_result"]);
|
|
934
|
+
function extractText(content) {
|
|
935
|
+
if (typeof content === "string") return content;
|
|
936
|
+
return content.filter((b) => b.type === "text" && typeof b.text === "string").map((b) => b.text).join("\n");
|
|
937
|
+
}
|
|
938
|
+
function titleFromFirstUserMessage(turns) {
|
|
939
|
+
const firstUser = turns.find((t) => t.role === "user" && t.content.trim().length > 0);
|
|
940
|
+
return resolveSessionTitle(void 0, firstUser?.content, "Claude Session");
|
|
941
|
+
}
|
|
942
|
+
function parseLine(line) {
|
|
943
|
+
try {
|
|
944
|
+
return JSON.parse(line);
|
|
945
|
+
} catch {
|
|
946
|
+
return null;
|
|
947
|
+
}
|
|
948
|
+
}
|
|
949
|
+
function eventToTurn(event) {
|
|
950
|
+
if (!event.message || !event.message.role) return null;
|
|
951
|
+
const role = event.message.role;
|
|
952
|
+
if (role !== "user" && role !== "assistant") return null;
|
|
953
|
+
const content = extractText(event.message.content);
|
|
954
|
+
if (content.trim().length === 0) return null;
|
|
955
|
+
return { role, content, timestamp: event.timestamp };
|
|
956
|
+
}
|
|
957
|
+
var claudeAdapter = {
|
|
958
|
+
name: "claude",
|
|
959
|
+
async detect(filePath) {
|
|
960
|
+
if (path9.extname(filePath).toLowerCase() !== CLAUDE_EXTENSION) return false;
|
|
961
|
+
const raw = await readFile8(filePath, "utf-8").catch(() => "");
|
|
962
|
+
const firstLine = raw.split("\n")[0].trim();
|
|
963
|
+
if (!firstLine.startsWith("{")) return false;
|
|
964
|
+
try {
|
|
965
|
+
const obj = JSON.parse(firstLine);
|
|
966
|
+
return typeof obj.type === "string" && CLAUDE_TYPE_MARKERS.has(obj.type);
|
|
967
|
+
} catch {
|
|
968
|
+
return false;
|
|
969
|
+
}
|
|
970
|
+
},
|
|
971
|
+
async parse(filePath) {
|
|
972
|
+
const raw = await readFile8(filePath, "utf-8");
|
|
973
|
+
const lines = raw.split("\n").filter((l) => l.trim().length > 0);
|
|
974
|
+
if (lines.length === 0) {
|
|
975
|
+
throw new Error(`Claude session file is empty: ${filePath}`);
|
|
976
|
+
}
|
|
977
|
+
const turns = [];
|
|
978
|
+
const timestamps = [];
|
|
979
|
+
for (const [index, line] of lines.entries()) {
|
|
980
|
+
const event = parseLine(line);
|
|
981
|
+
if (event === null) {
|
|
982
|
+
throw new Error(
|
|
983
|
+
`Malformed JSON on line ${index + 1} of Claude session: ${filePath}`
|
|
984
|
+
);
|
|
985
|
+
}
|
|
986
|
+
if (event.timestamp) timestamps.push(event.timestamp);
|
|
987
|
+
const turn = eventToTurn(event);
|
|
988
|
+
if (turn) turns.push(turn);
|
|
989
|
+
}
|
|
990
|
+
const title = titleFromFirstUserMessage(turns);
|
|
991
|
+
return {
|
|
992
|
+
title,
|
|
993
|
+
adapter: "claude",
|
|
994
|
+
startedAt: timestamps[0],
|
|
995
|
+
endedAt: timestamps[timestamps.length - 1],
|
|
996
|
+
participantIdentity: "Claude Code",
|
|
997
|
+
turns
|
|
998
|
+
};
|
|
999
|
+
}
|
|
1000
|
+
};
|
|
1001
|
+
|
|
1002
|
+
// src/adapters/codex.ts
|
|
1003
|
+
import { readFile as readFile9 } from "fs/promises";
|
|
1004
|
+
import path10 from "path";
|
|
1005
|
+
var CODEX_EXTENSION = ".json";
|
|
1006
|
+
function unixToIso(ts) {
|
|
1007
|
+
return new Date(ts * 1e3).toISOString();
|
|
1008
|
+
}
|
|
1009
|
+
function extractTurns(mapping) {
|
|
1010
|
+
const turns = [];
|
|
1011
|
+
for (const node of Object.values(mapping)) {
|
|
1012
|
+
const msg = node.message;
|
|
1013
|
+
if (!msg) continue;
|
|
1014
|
+
const role = msg.author?.role;
|
|
1015
|
+
if (role !== "user" && role !== "assistant") continue;
|
|
1016
|
+
const content = (msg.content?.parts ?? []).join("\n").trim();
|
|
1017
|
+
if (content.length === 0) continue;
|
|
1018
|
+
turns.push({
|
|
1019
|
+
role,
|
|
1020
|
+
content,
|
|
1021
|
+
timestamp: msg.create_time != null ? unixToIso(msg.create_time) : void 0
|
|
1022
|
+
});
|
|
1023
|
+
}
|
|
1024
|
+
turns.sort((a, b) => {
|
|
1025
|
+
if (!a.timestamp || !b.timestamp) return 0;
|
|
1026
|
+
return a.timestamp.localeCompare(b.timestamp);
|
|
1027
|
+
});
|
|
1028
|
+
return turns;
|
|
1029
|
+
}
|
|
1030
|
+
function isCodexExport(value) {
|
|
1031
|
+
return Array.isArray(value) && value.length > 0 && typeof value[0].mapping === "object";
|
|
1032
|
+
}
|
|
1033
|
+
var codexAdapter = {
|
|
1034
|
+
name: "codex",
|
|
1035
|
+
async detect(filePath) {
|
|
1036
|
+
if (path10.extname(filePath).toLowerCase() !== CODEX_EXTENSION) return false;
|
|
1037
|
+
const raw = await readFile9(filePath, "utf-8").catch(() => "");
|
|
1038
|
+
if (raw.trimStart()[0] !== "[") return false;
|
|
1039
|
+
try {
|
|
1040
|
+
return isCodexExport(JSON.parse(raw));
|
|
1041
|
+
} catch {
|
|
1042
|
+
return false;
|
|
1043
|
+
}
|
|
1044
|
+
},
|
|
1045
|
+
async parse(filePath) {
|
|
1046
|
+
const raw = await readFile9(filePath, "utf-8");
|
|
1047
|
+
const parsed = parseJsonOrThrow(raw, filePath);
|
|
1048
|
+
if (!isCodexExport(parsed)) {
|
|
1049
|
+
throw new Error(
|
|
1050
|
+
`Codex session file does not contain a conversation array: ${filePath}`
|
|
1051
|
+
);
|
|
1052
|
+
}
|
|
1053
|
+
const conv = parsed[0];
|
|
1054
|
+
const turns = extractTurns(conv.mapping ?? {});
|
|
1055
|
+
const firstUser = turns.find((t) => t.role === "user");
|
|
1056
|
+
return {
|
|
1057
|
+
title: resolveSessionTitle(conv.title, firstUser?.content, "Codex Session"),
|
|
1058
|
+
adapter: "codex",
|
|
1059
|
+
startedAt: conv.create_time != null ? unixToIso(conv.create_time) : void 0,
|
|
1060
|
+
endedAt: conv.update_time != null ? unixToIso(conv.update_time) : void 0,
|
|
1061
|
+
participantIdentity: "OpenAI Codex",
|
|
1062
|
+
turns
|
|
1063
|
+
};
|
|
1064
|
+
}
|
|
1065
|
+
};
|
|
1066
|
+
|
|
1067
|
+
// src/adapters/cursor.ts
|
|
1068
|
+
import { readFile as readFile10 } from "fs/promises";
|
|
1069
|
+
import path11 from "path";
|
|
1070
|
+
var CURSOR_EXTENSION = ".json";
|
|
1071
|
+
function isTabsExport(value) {
|
|
1072
|
+
return typeof value === "object" && value !== null && "tabs" in value && Array.isArray(value.tabs);
|
|
1073
|
+
}
|
|
1074
|
+
function isFlatExport(value) {
|
|
1075
|
+
return typeof value === "object" && value !== null && "messages" in value && Array.isArray(value.messages);
|
|
1076
|
+
}
|
|
1077
|
+
function extractMessagesAndTitle(data) {
|
|
1078
|
+
if (isTabsExport(data)) {
|
|
1079
|
+
const tab = data.tabs[0];
|
|
1080
|
+
return { messages: tab?.messages ?? [], title: tab?.title };
|
|
1081
|
+
}
|
|
1082
|
+
return { messages: data.messages, title: data.title };
|
|
1083
|
+
}
|
|
1084
|
+
function toTurns(messages) {
|
|
1085
|
+
const turns = [];
|
|
1086
|
+
for (const msg of messages) {
|
|
1087
|
+
const role = msg.role;
|
|
1088
|
+
if (role !== "user" && role !== "assistant") continue;
|
|
1089
|
+
const content = (msg.content ?? "").trim();
|
|
1090
|
+
if (content.length === 0) continue;
|
|
1091
|
+
turns.push({ role, content, timestamp: msg.timestamp });
|
|
1092
|
+
}
|
|
1093
|
+
return turns;
|
|
1094
|
+
}
|
|
1095
|
+
var cursorAdapter = {
|
|
1096
|
+
name: "cursor",
|
|
1097
|
+
async detect(filePath) {
|
|
1098
|
+
if (path11.extname(filePath).toLowerCase() !== CURSOR_EXTENSION) return false;
|
|
1099
|
+
const raw = await readFile10(filePath, "utf-8").catch(() => "");
|
|
1100
|
+
if (raw.trimStart()[0] !== "{") return false;
|
|
1101
|
+
try {
|
|
1102
|
+
const parsed = JSON.parse(raw);
|
|
1103
|
+
return isTabsExport(parsed) || isFlatExport(parsed);
|
|
1104
|
+
} catch {
|
|
1105
|
+
return false;
|
|
1106
|
+
}
|
|
1107
|
+
},
|
|
1108
|
+
async parse(filePath) {
|
|
1109
|
+
const raw = await readFile10(filePath, "utf-8");
|
|
1110
|
+
const parsed = parseJsonOrThrow(raw, filePath);
|
|
1111
|
+
if (!isTabsExport(parsed) && !isFlatExport(parsed)) {
|
|
1112
|
+
throw new Error(
|
|
1113
|
+
`Cursor session file does not match a known Cursor export schema: ${filePath}`
|
|
1114
|
+
);
|
|
1115
|
+
}
|
|
1116
|
+
const { messages, title: rawTitle } = extractMessagesAndTitle(parsed);
|
|
1117
|
+
const turns = toTurns(messages);
|
|
1118
|
+
const firstUser = turns.find((t) => t.role === "user");
|
|
1119
|
+
const timestamps = turns.filter((t) => t.timestamp != null).map((t) => t.timestamp);
|
|
1120
|
+
return {
|
|
1121
|
+
title: resolveSessionTitle(rawTitle, firstUser?.content, "Cursor Session"),
|
|
1122
|
+
adapter: "cursor",
|
|
1123
|
+
startedAt: timestamps[0],
|
|
1124
|
+
endedAt: timestamps[timestamps.length - 1],
|
|
1125
|
+
participantIdentity: "Cursor AI",
|
|
1126
|
+
turns
|
|
1127
|
+
};
|
|
1128
|
+
}
|
|
1129
|
+
};
|
|
1130
|
+
|
|
1131
|
+
// src/adapters/registry.ts
|
|
1132
|
+
var ADAPTERS = [claudeAdapter, codexAdapter, cursorAdapter];
|
|
1133
|
+
async function detectAdapter(filePath) {
|
|
1134
|
+
for (const adapter of ADAPTERS) {
|
|
1135
|
+
if (await adapter.detect(filePath)) return adapter;
|
|
1136
|
+
}
|
|
1137
|
+
return null;
|
|
1138
|
+
}
|
|
1139
|
+
async function parseSessionFile(filePath) {
|
|
1140
|
+
const adapter = await detectAdapter(filePath);
|
|
1141
|
+
if (!adapter) {
|
|
1142
|
+
throw new Error(
|
|
1143
|
+
`No session adapter recognised the file: ${filePath}
|
|
1144
|
+
Supported formats: ${ADAPTERS.map((a) => a.name).join(", ")}`
|
|
1145
|
+
);
|
|
1146
|
+
}
|
|
1147
|
+
const session = await adapter.parse(filePath);
|
|
1148
|
+
assertSessionHasUsableTurns(session, filePath);
|
|
1149
|
+
return session;
|
|
1150
|
+
}
|
|
1151
|
+
function assertSessionHasUsableTurns(session, filePath) {
|
|
1152
|
+
const hasUsableTurn = session.turns.some(
|
|
1153
|
+
(t) => (t.role === "user" || t.role === "assistant") && t.content.trim().length > 0
|
|
1154
|
+
);
|
|
1155
|
+
if (!hasUsableTurn) {
|
|
1156
|
+
throw new Error(
|
|
1157
|
+
`${session.adapter} session has no usable turns: ${filePath}
|
|
1158
|
+
The file matches the ${session.adapter} export shape, but no user or assistant message with content was found. Re-export the session or delete the file if it is empty.`
|
|
1159
|
+
);
|
|
1160
|
+
}
|
|
1161
|
+
}
|
|
1162
|
+
function formatSessionAsMarkdown(session) {
|
|
1163
|
+
const lines = [];
|
|
1164
|
+
for (const turn of session.turns) {
|
|
1165
|
+
const label = turn.role === "user" ? "User" : session.participantIdentity ?? "Assistant";
|
|
1166
|
+
const heading = turn.timestamp ? `### ${label} _(${turn.timestamp})_` : `### ${label}`;
|
|
1167
|
+
lines.push(heading);
|
|
1168
|
+
lines.push("");
|
|
1169
|
+
lines.push(turn.content);
|
|
1170
|
+
lines.push("");
|
|
1171
|
+
}
|
|
1172
|
+
return lines.join("\n").trimEnd();
|
|
1173
|
+
}
|
|
1174
|
+
|
|
1175
|
+
// src/commands/ingest-session.ts
|
|
1176
|
+
function buildSessionFrontmatter(session, sourcePath) {
|
|
1177
|
+
const meta = {
|
|
1178
|
+
title: session.title,
|
|
1179
|
+
source: sourcePath,
|
|
1180
|
+
adapter: session.adapter,
|
|
1181
|
+
ingestedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
1182
|
+
};
|
|
1183
|
+
if (session.startedAt) meta.sessionStartedAt = session.startedAt;
|
|
1184
|
+
if (session.endedAt) meta.sessionEndedAt = session.endedAt;
|
|
1185
|
+
if (session.participantIdentity) meta.participant = session.participantIdentity;
|
|
1186
|
+
return buildFrontmatter(meta);
|
|
1187
|
+
}
|
|
1188
|
+
async function saveSessionSource(session, sourcePath) {
|
|
1189
|
+
const frontmatter = buildSessionFrontmatter(session, sourcePath);
|
|
1190
|
+
const body = formatSessionAsMarkdown(session);
|
|
1191
|
+
const document = `${frontmatter}
|
|
1192
|
+
|
|
1193
|
+
${body}
|
|
1194
|
+
`;
|
|
1195
|
+
return saveSource(session.title, document, sourcePath);
|
|
1196
|
+
}
|
|
1197
|
+
async function ingestSessionFile(filePath) {
|
|
1198
|
+
status("*", info(`Ingesting session: ${filePath}`));
|
|
1199
|
+
const session = await parseSessionFile(filePath);
|
|
1200
|
+
const savedPath = await saveSessionSource(session, filePath);
|
|
1201
|
+
status(
|
|
1202
|
+
"+",
|
|
1203
|
+
success(
|
|
1204
|
+
`Saved ${bold(path12.basename(savedPath))} [${session.adapter}] \u2192 ${source(savedPath)}`
|
|
1205
|
+
)
|
|
1206
|
+
);
|
|
1207
|
+
return {
|
|
1208
|
+
filename: path12.basename(savedPath),
|
|
1209
|
+
adapter: session.adapter,
|
|
1210
|
+
title: session.title,
|
|
1211
|
+
source: filePath
|
|
1212
|
+
};
|
|
1213
|
+
}
|
|
1214
|
+
async function listDirectoryFiles(dirPath) {
|
|
1215
|
+
const entries = await readdir(dirPath);
|
|
1216
|
+
const files = [];
|
|
1217
|
+
for (const entry of entries) {
|
|
1218
|
+
const full = path12.join(dirPath, entry);
|
|
1219
|
+
const info2 = await stat(full);
|
|
1220
|
+
if (info2.isFile()) files.push(full);
|
|
1221
|
+
}
|
|
1222
|
+
return files;
|
|
1223
|
+
}
|
|
1224
|
+
async function ingestDirectory(dirPath) {
|
|
1225
|
+
const files = await listDirectoryFiles(dirPath);
|
|
1226
|
+
if (files.length === 0) {
|
|
1227
|
+
throw new Error(`No files found in directory: ${dirPath}`);
|
|
1228
|
+
}
|
|
1229
|
+
status("*", info(`Scanning ${files.length} file(s) in: ${dirPath}`));
|
|
1230
|
+
let imported = 0;
|
|
1231
|
+
let skipped = 0;
|
|
1232
|
+
for (const file of files) {
|
|
1233
|
+
try {
|
|
1234
|
+
await ingestSessionFile(file);
|
|
1235
|
+
imported++;
|
|
1236
|
+
} catch (err) {
|
|
1237
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
1238
|
+
status("!", warn(`Skipped ${path12.basename(file)}: ${message}`));
|
|
1239
|
+
skipped++;
|
|
1240
|
+
}
|
|
1241
|
+
}
|
|
1242
|
+
if (imported === 0) {
|
|
1243
|
+
throw new Error(
|
|
1244
|
+
`No sessions imported from ${dirPath} (${skipped} file(s) skipped). Check that at least one file is in a supported session format.`
|
|
1245
|
+
);
|
|
1246
|
+
}
|
|
1247
|
+
status(
|
|
1248
|
+
"\u2192",
|
|
1249
|
+
dim(`Imported ${imported} session(s), skipped ${skipped}.`)
|
|
1250
|
+
);
|
|
1251
|
+
}
|
|
1252
|
+
async function ingestSession(targetPath) {
|
|
1253
|
+
const info2 = await stat(targetPath).catch(() => {
|
|
1254
|
+
throw new Error(`Path not found: ${targetPath}`);
|
|
1255
|
+
});
|
|
1256
|
+
if (info2.isDirectory()) {
|
|
1257
|
+
await ingestDirectory(targetPath);
|
|
1258
|
+
} else {
|
|
1259
|
+
await ingestSessionFile(targetPath);
|
|
1260
|
+
}
|
|
1261
|
+
status("\u2192", dim("Next: llmwiki compile"));
|
|
1262
|
+
}
|
|
1263
|
+
|
|
1264
|
+
// src/commands/view.ts
|
|
1265
|
+
import { spawn } from "child_process";
|
|
1266
|
+
|
|
1267
|
+
// src/viewer/server.ts
|
|
1268
|
+
import http from "http";
|
|
1269
|
+
|
|
1270
|
+
// src/linter/cache.ts
|
|
1271
|
+
import { mkdir as mkdir3, readFile as readFile11 } from "fs/promises";
|
|
1272
|
+
import path13 from "path";
|
|
1273
|
+
var LINT_CACHE_TIMESTAMP_PATTERN = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}Z$/;
|
|
1274
|
+
async function writeLintCache(root, summary) {
|
|
1275
|
+
await mkdir3(path13.join(root, LLMWIKI_DIR), { recursive: true });
|
|
1276
|
+
const entry = {
|
|
1277
|
+
warnings: summary.warnings,
|
|
1278
|
+
errors: summary.errors,
|
|
1279
|
+
at: (/* @__PURE__ */ new Date()).toISOString()
|
|
1280
|
+
};
|
|
1281
|
+
await atomicWrite(path13.join(root, LAST_LINT_FILE), `${JSON.stringify(entry, null, 2)}
|
|
1282
|
+
`);
|
|
1283
|
+
}
|
|
1284
|
+
async function readLintCache(root) {
|
|
1285
|
+
let raw;
|
|
1286
|
+
try {
|
|
1287
|
+
raw = await readFile11(path13.join(root, LAST_LINT_FILE), "utf-8");
|
|
1288
|
+
} catch {
|
|
1289
|
+
return null;
|
|
1290
|
+
}
|
|
1291
|
+
let parsed;
|
|
1292
|
+
try {
|
|
1293
|
+
parsed = JSON.parse(raw);
|
|
1294
|
+
} catch {
|
|
1295
|
+
return null;
|
|
1296
|
+
}
|
|
1297
|
+
if (!isValidEntry(parsed)) return null;
|
|
1298
|
+
return { warnings: parsed.warnings, errors: parsed.errors, at: parsed.at };
|
|
1299
|
+
}
|
|
1300
|
+
function isNonNegativeInteger(value) {
|
|
1301
|
+
return typeof value === "number" && Number.isInteger(value) && value >= 0;
|
|
1302
|
+
}
|
|
1303
|
+
function isValidEntry(value) {
|
|
1304
|
+
if (typeof value !== "object" || value === null) return false;
|
|
1305
|
+
const candidate = value;
|
|
1306
|
+
return isNonNegativeInteger(candidate.warnings) && isNonNegativeInteger(candidate.errors) && typeof candidate.at === "string" && LINT_CACHE_TIMESTAMP_PATTERN.test(candidate.at);
|
|
1307
|
+
}
|
|
1308
|
+
|
|
1309
|
+
// src/viewer/health.ts
|
|
1310
|
+
async function buildHealthResponse(snapshot) {
|
|
1311
|
+
const lint2 = await readLintCache(snapshot.root);
|
|
1312
|
+
return {
|
|
1313
|
+
pendingReviews: snapshot.counts.pendingReviews,
|
|
1314
|
+
sources: snapshot.counts.compiledSources,
|
|
1315
|
+
sourceFiles: snapshot.counts.sourceFiles,
|
|
1316
|
+
concepts: snapshot.counts.concepts,
|
|
1317
|
+
queries: snapshot.counts.queries,
|
|
1318
|
+
lint: lint2
|
|
1319
|
+
};
|
|
1320
|
+
}
|
|
1321
|
+
|
|
1322
|
+
// src/viewer/shell.ts
|
|
1323
|
+
import { readFile as readFile12 } from "fs/promises";
|
|
1324
|
+
import path14 from "path";
|
|
1325
|
+
var PAGE_INDEX_MARKER = "<!--PAGE_INDEX-->";
|
|
1326
|
+
var templateCache = /* @__PURE__ */ new Map();
|
|
1327
|
+
async function loadShellTemplate(assetsDir) {
|
|
1328
|
+
const cached = templateCache.get(assetsDir);
|
|
1329
|
+
if (cached !== void 0) return cached;
|
|
1330
|
+
let bytes;
|
|
1331
|
+
try {
|
|
1332
|
+
bytes = await readFile12(path14.join(assetsDir, "index.html"), "utf-8");
|
|
1333
|
+
} catch {
|
|
1334
|
+
bytes = null;
|
|
1335
|
+
}
|
|
1336
|
+
templateCache.set(assetsDir, bytes);
|
|
1337
|
+
return bytes;
|
|
1338
|
+
}
|
|
1339
|
+
function substitutePageIndex(template, pages) {
|
|
1340
|
+
const embedded = pages.map((page) => ({
|
|
1341
|
+
id: page.id,
|
|
1342
|
+
pageDirectory: page.pageDirectory,
|
|
1343
|
+
slug: page.slug,
|
|
1344
|
+
title: page.title,
|
|
1345
|
+
kind: typeof page.frontmatter.kind === "string" && page.frontmatter.kind.length > 0 ? page.frontmatter.kind : "concept"
|
|
1346
|
+
}));
|
|
1347
|
+
const json = JSON.stringify({ pages: embedded }).replace(/</g, "\\u003c");
|
|
1348
|
+
const block = `<script type="application/json" id="page-index">${json}</script>`;
|
|
1349
|
+
return template.replace(PAGE_INDEX_MARKER, block);
|
|
1350
|
+
}
|
|
1351
|
+
|
|
1352
|
+
// src/viewer/static-assets.ts
|
|
1353
|
+
import { readFile as readFile13, realpath } from "fs/promises";
|
|
1354
|
+
import path16 from "path";
|
|
1355
|
+
import { fileURLToPath } from "url";
|
|
1356
|
+
|
|
1357
|
+
// src/viewer/path-safety.ts
|
|
1358
|
+
import path15 from "path";
|
|
1359
|
+
var PathSafetyError = class extends Error {
|
|
1360
|
+
constructor(message) {
|
|
1361
|
+
super(message);
|
|
1362
|
+
this.name = "PathSafetyError";
|
|
1363
|
+
}
|
|
1364
|
+
};
|
|
1365
|
+
function assertSafeSlug(decodedSlug) {
|
|
1366
|
+
if (typeof decodedSlug !== "string") {
|
|
1367
|
+
throw new PathSafetyError("slug must be a string");
|
|
1368
|
+
}
|
|
1369
|
+
if (decodedSlug.length === 0) {
|
|
1370
|
+
throw new PathSafetyError("slug must not be empty");
|
|
1371
|
+
}
|
|
1372
|
+
if (decodedSlug === "." || decodedSlug === "..") {
|
|
1373
|
+
throw new PathSafetyError(`slug must not be "${decodedSlug}"`);
|
|
1374
|
+
}
|
|
1375
|
+
if (decodedSlug.includes("/") || decodedSlug.includes("\\")) {
|
|
1376
|
+
throw new PathSafetyError("slug must not contain path separators");
|
|
1377
|
+
}
|
|
1378
|
+
if (decodedSlug.includes("\0")) {
|
|
1379
|
+
throw new PathSafetyError("slug must not contain NUL bytes");
|
|
1380
|
+
}
|
|
1381
|
+
if (path15.sep !== "/" && decodedSlug.includes(path15.sep)) {
|
|
1382
|
+
throw new PathSafetyError(`slug must not contain platform separator "${path15.sep}"`);
|
|
1383
|
+
}
|
|
1384
|
+
}
|
|
1385
|
+
|
|
1386
|
+
// src/viewer/static-assets.ts
|
|
1387
|
+
var ASSETS_DIR = path16.join(
|
|
1388
|
+
path16.dirname(fileURLToPath(import.meta.url)),
|
|
1389
|
+
"viewer/assets"
|
|
1390
|
+
);
|
|
1391
|
+
var ASSET_CONTENT_TYPES = {
|
|
1392
|
+
".html": "text/html; charset=utf-8",
|
|
1393
|
+
".css": "text/css; charset=utf-8",
|
|
1394
|
+
".js": "application/javascript; charset=utf-8",
|
|
1395
|
+
".svg": "image/svg+xml",
|
|
1396
|
+
".png": "image/png"
|
|
1397
|
+
};
|
|
1398
|
+
async function handleAsset(res, pathname) {
|
|
1399
|
+
const segments = decodeAssetSegments(pathname);
|
|
1400
|
+
if (!segments) {
|
|
1401
|
+
writeAssetError(res, 400, "bad_asset_path", "Bad asset path.");
|
|
1402
|
+
return;
|
|
1403
|
+
}
|
|
1404
|
+
if (segments.length === 0) {
|
|
1405
|
+
writeAssetError(res, 404, "asset_not_found", "Asset not found.");
|
|
1406
|
+
return;
|
|
1407
|
+
}
|
|
1408
|
+
const contentType = ASSET_CONTENT_TYPES[path16.extname(segments[segments.length - 1]).toLowerCase()];
|
|
1409
|
+
if (!contentType) {
|
|
1410
|
+
writeAssetError(res, 404, "asset_not_found", "Asset not found.");
|
|
1411
|
+
return;
|
|
1412
|
+
}
|
|
1413
|
+
const resolved = await resolveAssetPath(segments);
|
|
1414
|
+
if (!resolved) {
|
|
1415
|
+
writeAssetError(res, 404, "asset_not_found", "Asset not found.");
|
|
1416
|
+
return;
|
|
1417
|
+
}
|
|
1418
|
+
try {
|
|
1419
|
+
const body = await readFile13(resolved);
|
|
1420
|
+
res.statusCode = 200;
|
|
1421
|
+
res.setHeader("Content-Type", contentType);
|
|
1422
|
+
res.end(body);
|
|
1423
|
+
} catch {
|
|
1424
|
+
writeAssetError(res, 404, "asset_not_found", "Asset not found.");
|
|
1425
|
+
}
|
|
1426
|
+
}
|
|
1427
|
+
function decodeAssetSegments(pathname) {
|
|
1428
|
+
const trimmed = pathname.replace(/^\/assets\//, "");
|
|
1429
|
+
if (trimmed.length === 0) return [];
|
|
1430
|
+
const decoded = [];
|
|
1431
|
+
for (const raw of trimmed.split("/")) {
|
|
1432
|
+
let segment;
|
|
1433
|
+
try {
|
|
1434
|
+
segment = decodeURIComponent(raw);
|
|
1435
|
+
} catch {
|
|
1436
|
+
return null;
|
|
1437
|
+
}
|
|
1438
|
+
try {
|
|
1439
|
+
assertSafeSlug(segment);
|
|
1440
|
+
} catch (err) {
|
|
1441
|
+
if (err instanceof PathSafetyError) return null;
|
|
1442
|
+
throw err;
|
|
1443
|
+
}
|
|
1444
|
+
decoded.push(segment);
|
|
1445
|
+
}
|
|
1446
|
+
return decoded;
|
|
1447
|
+
}
|
|
1448
|
+
async function resolveAssetPath(segments) {
|
|
1449
|
+
const candidate = path16.join(ASSETS_DIR, ...segments);
|
|
1450
|
+
let resolved;
|
|
1451
|
+
try {
|
|
1452
|
+
resolved = await realpath(candidate);
|
|
1453
|
+
} catch {
|
|
1454
|
+
return null;
|
|
1455
|
+
}
|
|
1456
|
+
const baseReal = await realpath(ASSETS_DIR).catch(() => ASSETS_DIR);
|
|
1457
|
+
if (resolved === baseReal) return resolved;
|
|
1458
|
+
const prefix = baseReal.endsWith(path16.sep) ? baseReal : baseReal + path16.sep;
|
|
1459
|
+
return resolved.startsWith(prefix) ? resolved : null;
|
|
1460
|
+
}
|
|
1461
|
+
function writeAssetError(res, status2, code, message) {
|
|
1462
|
+
res.statusCode = status2;
|
|
1463
|
+
res.setHeader("Content-Type", "application/json; charset=utf-8");
|
|
1464
|
+
res.end(JSON.stringify({ error: { code, message } }));
|
|
1465
|
+
}
|
|
1466
|
+
|
|
1467
|
+
// src/viewer/render.ts
|
|
1468
|
+
import MarkdownIt from "markdown-it";
|
|
1469
|
+
import sanitizeHtml from "sanitize-html";
|
|
1470
|
+
|
|
1471
|
+
// src/wiki/collect.ts
|
|
1472
|
+
import { readdir as readdir2, readFile as readFile14, realpath as realpath2 } from "fs/promises";
|
|
1473
|
+
import path17 from "path";
|
|
1474
|
+
var WIKILINK_RE = /\[\[([^\]|]+)(?:\|[^\]]+)?\]\]/g;
|
|
1475
|
+
function extractWikilinkSlugs(body) {
|
|
1476
|
+
const slugs = /* @__PURE__ */ new Set();
|
|
1477
|
+
WIKILINK_RE.lastIndex = 0;
|
|
1478
|
+
let match;
|
|
1479
|
+
while ((match = WIKILINK_RE.exec(body)) !== null) {
|
|
1480
|
+
slugs.add(slugify(match[1].trim()));
|
|
1481
|
+
}
|
|
1482
|
+
return [...slugs];
|
|
1483
|
+
}
|
|
1484
|
+
async function safeRealpath(p) {
|
|
1485
|
+
try {
|
|
1486
|
+
return await realpath2(p);
|
|
1487
|
+
} catch {
|
|
1488
|
+
return null;
|
|
1489
|
+
}
|
|
1490
|
+
}
|
|
1491
|
+
function isInsideDir(child, dir) {
|
|
1492
|
+
if (child === dir) return true;
|
|
1493
|
+
const prefix = dir.endsWith(path17.sep) ? dir : dir + path17.sep;
|
|
1494
|
+
return child.startsWith(prefix);
|
|
1495
|
+
}
|
|
1496
|
+
async function parsePageFile(filePath, slug, pageDirectory) {
|
|
1497
|
+
let raw;
|
|
1498
|
+
try {
|
|
1499
|
+
raw = await readFile14(filePath, "utf-8");
|
|
1500
|
+
} catch {
|
|
1501
|
+
return null;
|
|
1502
|
+
}
|
|
1503
|
+
const { meta, body, hasFrontmatterBlock, malformedFrontmatter } = parseFrontmatterStatus(raw);
|
|
1504
|
+
const title = typeof meta.title === "string" && meta.title.length > 0 ? meta.title : void 0;
|
|
1505
|
+
return {
|
|
1506
|
+
slug,
|
|
1507
|
+
pageDirectory,
|
|
1508
|
+
filePath,
|
|
1509
|
+
title,
|
|
1510
|
+
frontmatter: meta,
|
|
1511
|
+
body,
|
|
1512
|
+
parseStatus: {
|
|
1513
|
+
hasFrontmatterBlock,
|
|
1514
|
+
malformedFrontmatter,
|
|
1515
|
+
hasTitle: title !== void 0,
|
|
1516
|
+
orphaned: meta.orphaned === true
|
|
1517
|
+
}
|
|
1518
|
+
};
|
|
1519
|
+
}
|
|
1520
|
+
async function collectFromDir(canonicalRoot, pageDirectory, subdir) {
|
|
1521
|
+
const expectedDir = path17.join(canonicalRoot, subdir);
|
|
1522
|
+
const realDir = await safeRealpath(expectedDir);
|
|
1523
|
+
if (realDir !== expectedDir) return [];
|
|
1524
|
+
let files;
|
|
1525
|
+
try {
|
|
1526
|
+
files = await readdir2(realDir);
|
|
1527
|
+
} catch {
|
|
1528
|
+
return [];
|
|
1529
|
+
}
|
|
1530
|
+
const pages = [];
|
|
1531
|
+
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
1532
|
+
const candidate = path17.join(realDir, file);
|
|
1533
|
+
const resolved = await safeRealpath(candidate);
|
|
1534
|
+
if (!resolved || !isInsideDir(resolved, realDir)) continue;
|
|
1535
|
+
const slug = file.replace(/\.md$/, "");
|
|
1536
|
+
const page = await parsePageFile(resolved, slug, pageDirectory);
|
|
1537
|
+
if (page) pages.push(page);
|
|
1538
|
+
}
|
|
1539
|
+
return pages;
|
|
1540
|
+
}
|
|
1541
|
+
async function collectRawWikiPages(root) {
|
|
1542
|
+
const canonicalRoot = await safeRealpath(root);
|
|
1543
|
+
if (!canonicalRoot) return [];
|
|
1544
|
+
const [concepts, queries] = await Promise.all([
|
|
1545
|
+
collectFromDir(canonicalRoot, "concepts", CONCEPTS_DIR),
|
|
1546
|
+
collectFromDir(canonicalRoot, "queries", QUERIES_DIR)
|
|
1547
|
+
]);
|
|
1548
|
+
return [...concepts, ...queries];
|
|
1549
|
+
}
|
|
1550
|
+
|
|
1551
|
+
// src/viewer/collect.ts
|
|
1552
|
+
async function collectViewerPages(root) {
|
|
1553
|
+
const raw = await collectRawWikiPages(root);
|
|
1554
|
+
return decoratePages(raw);
|
|
1555
|
+
}
|
|
1556
|
+
function resolveBareSlug(slug, pages) {
|
|
1557
|
+
if (slug.length === 0) return null;
|
|
1558
|
+
const concept = pages.find((p) => p.pageDirectory === "concepts" && p.slug === slug);
|
|
1559
|
+
if (concept) return concept.id;
|
|
1560
|
+
const query = pages.find((p) => p.pageDirectory === "queries" && p.slug === slug);
|
|
1561
|
+
if (query) return query.id;
|
|
1562
|
+
return null;
|
|
1563
|
+
}
|
|
1564
|
+
function resolveBareSlugList(targets, pages) {
|
|
1565
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1566
|
+
const ordered = [];
|
|
1567
|
+
for (const target of targets) {
|
|
1568
|
+
const resolved = resolveBareSlug(target, pages);
|
|
1569
|
+
if (resolved && !seen.has(resolved)) {
|
|
1570
|
+
seen.add(resolved);
|
|
1571
|
+
ordered.push(resolved);
|
|
1572
|
+
}
|
|
1573
|
+
}
|
|
1574
|
+
return ordered;
|
|
1575
|
+
}
|
|
1576
|
+
function decoratePages(raw) {
|
|
1577
|
+
const shells = raw.map(buildPageShell);
|
|
1578
|
+
for (const page of shells) {
|
|
1579
|
+
const targets = extractWikilinkSlugs(page.body);
|
|
1580
|
+
page.outgoingLinks = resolveBareSlugList(targets, shells);
|
|
1581
|
+
}
|
|
1582
|
+
return shells;
|
|
1583
|
+
}
|
|
1584
|
+
function buildPageShell(page) {
|
|
1585
|
+
const id = `${page.pageDirectory}/${page.slug}`;
|
|
1586
|
+
return {
|
|
1587
|
+
id,
|
|
1588
|
+
slug: page.slug,
|
|
1589
|
+
pageDirectory: page.pageDirectory,
|
|
1590
|
+
title: page.title ?? page.slug,
|
|
1591
|
+
filePath: page.filePath,
|
|
1592
|
+
frontmatter: page.frontmatter,
|
|
1593
|
+
body: page.body,
|
|
1594
|
+
outgoingLinks: [],
|
|
1595
|
+
citations: extractClaimCitations(page.body),
|
|
1596
|
+
warnings: warningsFromParseStatus(page)
|
|
1597
|
+
};
|
|
1598
|
+
}
|
|
1599
|
+
function warningsFromParseStatus(page) {
|
|
1600
|
+
const warnings = [];
|
|
1601
|
+
if (!page.parseStatus.hasFrontmatterBlock) {
|
|
1602
|
+
warnings.push({
|
|
1603
|
+
code: "missing_frontmatter",
|
|
1604
|
+
message: `Page "${page.slug}" has no frontmatter block.`
|
|
1605
|
+
});
|
|
1606
|
+
} else if (page.parseStatus.malformedFrontmatter) {
|
|
1607
|
+
warnings.push({
|
|
1608
|
+
code: "malformed_frontmatter",
|
|
1609
|
+
message: `Page "${page.slug}" has malformed YAML frontmatter.`
|
|
1610
|
+
});
|
|
1611
|
+
}
|
|
1612
|
+
if (!page.parseStatus.hasTitle) {
|
|
1613
|
+
warnings.push({
|
|
1614
|
+
code: "missing_title",
|
|
1615
|
+
message: `Page "${page.slug}" has no frontmatter title; displaying slug.`
|
|
1616
|
+
});
|
|
1617
|
+
}
|
|
1618
|
+
return warnings;
|
|
1619
|
+
}
|
|
1620
|
+
|
|
1621
|
+
// src/viewer/markdown-it-helpers.ts
|
|
1622
|
+
function escapeHtml(input) {
|
|
1623
|
+
return input.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
|
|
1624
|
+
}
|
|
1625
|
+
function currentLinkLevel(state) {
|
|
1626
|
+
const lifted = state;
|
|
1627
|
+
return typeof lifted.linkLevel === "number" ? lifted.linkLevel : 0;
|
|
1628
|
+
}
|
|
1629
|
+
function shouldDeferInlineRule(state, silent) {
|
|
1630
|
+
if (currentLinkLevel(state) > 0) return true;
|
|
1631
|
+
if (silent) return true;
|
|
1632
|
+
return false;
|
|
1633
|
+
}
|
|
1634
|
+
|
|
1635
|
+
// src/viewer/wikilink-rule.ts
|
|
1636
|
+
var OPEN = "[";
|
|
1637
|
+
var CHAR_OPEN_BRACKET = 91;
|
|
1638
|
+
function registerWikilink(md, context) {
|
|
1639
|
+
md.inline.ruler.after("link", "wikilink", buildParser(context));
|
|
1640
|
+
md.renderer.rules.wikilink = (tokens, idx) => renderWikilinkToken(tokens[idx]);
|
|
1641
|
+
}
|
|
1642
|
+
function buildParser(context) {
|
|
1643
|
+
return function parseWikilink(state, silent) {
|
|
1644
|
+
if (state.src.charCodeAt(state.pos) !== CHAR_OPEN_BRACKET) return false;
|
|
1645
|
+
if (state.src.charCodeAt(state.pos + 1) !== CHAR_OPEN_BRACKET) return false;
|
|
1646
|
+
if (shouldDeferInlineRule(state, silent)) return false;
|
|
1647
|
+
const closeAt = state.src.indexOf("]]", state.pos + 2);
|
|
1648
|
+
if (closeAt < 0) return false;
|
|
1649
|
+
const inner = state.src.slice(state.pos + 2, closeAt);
|
|
1650
|
+
if (inner.includes("\n") || inner.includes(OPEN)) return false;
|
|
1651
|
+
const { rawTarget, display } = splitTargetAndAlias(inner);
|
|
1652
|
+
const slug = slugify(rawTarget.trim());
|
|
1653
|
+
const resolved = resolveBareSlug(slug, context.pages);
|
|
1654
|
+
pushWikilinkToken(state, resolved, slug, display);
|
|
1655
|
+
state.pos = closeAt + 2;
|
|
1656
|
+
return true;
|
|
1657
|
+
};
|
|
1658
|
+
}
|
|
1659
|
+
function splitTargetAndAlias(inner) {
|
|
1660
|
+
const pipe = inner.indexOf("|");
|
|
1661
|
+
if (pipe < 0) return { rawTarget: inner, display: inner.trim() };
|
|
1662
|
+
return {
|
|
1663
|
+
rawTarget: inner.slice(0, pipe),
|
|
1664
|
+
display: inner.slice(pipe + 1).trim() || inner.slice(0, pipe).trim()
|
|
1665
|
+
};
|
|
1666
|
+
}
|
|
1667
|
+
function pushWikilinkToken(state, resolved, slug, display) {
|
|
1668
|
+
const token = state.push("wikilink", "", 0);
|
|
1669
|
+
token.meta = { resolved, slug, display };
|
|
1670
|
+
}
|
|
1671
|
+
function renderWikilinkToken(token) {
|
|
1672
|
+
const meta = token.meta;
|
|
1673
|
+
const display = escapeHtml(meta.display || meta.slug);
|
|
1674
|
+
if (!meta.resolved) {
|
|
1675
|
+
return `<span data-missing="true">[[${display}]]</span>`;
|
|
1676
|
+
}
|
|
1677
|
+
const href = `#/${encodeUriSegment(meta.resolved)}`;
|
|
1678
|
+
return `<a class="wikilink" data-page-id="${escapeHtml(meta.resolved)}" href="${escapeHtml(href)}">${display}</a>`;
|
|
1679
|
+
}
|
|
1680
|
+
function encodeUriSegment(id) {
|
|
1681
|
+
const [directory, slug] = id.split("/");
|
|
1682
|
+
return `${encodeURIComponent(directory)}/${encodeURIComponent(slug)}`;
|
|
1683
|
+
}
|
|
1684
|
+
|
|
1685
|
+
// src/viewer/citation-rule.ts
|
|
1686
|
+
import path18 from "path";
|
|
1687
|
+
import { pathToFileURL } from "url";
|
|
1688
|
+
var CHAR_CARET = 94;
|
|
1689
|
+
var CHAR_OPEN_BRACKET2 = 91;
|
|
1690
|
+
function registerCitation(md, context) {
|
|
1691
|
+
md.inline.ruler.after("link", "citation", buildParser2(context));
|
|
1692
|
+
md.renderer.rules.citation = (tokens, idx) => renderCitationToken(tokens[idx]);
|
|
1693
|
+
}
|
|
1694
|
+
function buildParser2(context) {
|
|
1695
|
+
return function parseCitation(state, silent) {
|
|
1696
|
+
if (state.src.charCodeAt(state.pos) !== CHAR_CARET) return false;
|
|
1697
|
+
if (state.src.charCodeAt(state.pos + 1) !== CHAR_OPEN_BRACKET2) return false;
|
|
1698
|
+
if (shouldDeferInlineRule(state, silent)) return false;
|
|
1699
|
+
const closeAt = state.src.indexOf("]", state.pos + 2);
|
|
1700
|
+
if (closeAt < 0) return false;
|
|
1701
|
+
const inner = state.src.slice(state.pos + 2, closeAt);
|
|
1702
|
+
if (inner.includes("\n")) return false;
|
|
1703
|
+
const citations = extractClaimCitations(`^[${inner}]`);
|
|
1704
|
+
pushChipTokens(state, citations, context);
|
|
1705
|
+
state.pos = closeAt + 1;
|
|
1706
|
+
return true;
|
|
1707
|
+
};
|
|
1708
|
+
}
|
|
1709
|
+
function pushChipTokens(state, citations, context) {
|
|
1710
|
+
for (const citation of citations) {
|
|
1711
|
+
for (const span of citation.spans) {
|
|
1712
|
+
const token = state.push("citation", "", 0);
|
|
1713
|
+
token.meta = buildChipMeta(span, context);
|
|
1714
|
+
}
|
|
1715
|
+
}
|
|
1716
|
+
}
|
|
1717
|
+
function buildChipMeta(span, context) {
|
|
1718
|
+
const meta = {
|
|
1719
|
+
file: span.file,
|
|
1720
|
+
lineStart: span.lines?.start,
|
|
1721
|
+
lineEnd: span.lines?.end,
|
|
1722
|
+
resolved: context.sourceFiles.has(span.file)
|
|
1723
|
+
};
|
|
1724
|
+
if (context.isLoopback && meta.resolved && isBareFilename(span.file)) {
|
|
1725
|
+
const absolutePath = path18.join(context.root, "sources", span.file);
|
|
1726
|
+
meta.absolutePath = absolutePath;
|
|
1727
|
+
meta.editorHref = buildEditorHref(absolutePath, meta.lineStart);
|
|
1728
|
+
}
|
|
1729
|
+
return meta;
|
|
1730
|
+
}
|
|
1731
|
+
function buildEditorHref(absolutePath, lineStart) {
|
|
1732
|
+
const encodedPath = pathToFileURL(absolutePath).pathname;
|
|
1733
|
+
if (lineStart === void 0) return `vscode://file${encodedPath}`;
|
|
1734
|
+
return `vscode://file${encodedPath}:${lineStart}`;
|
|
1735
|
+
}
|
|
1736
|
+
function isBareFilename(file) {
|
|
1737
|
+
if (file.length === 0) return false;
|
|
1738
|
+
if (file.includes("/") || file.includes("\\") || file.includes("\0")) return false;
|
|
1739
|
+
if (file === "." || file === "..") return false;
|
|
1740
|
+
return true;
|
|
1741
|
+
}
|
|
1742
|
+
function renderCitationToken(token) {
|
|
1743
|
+
const meta = token.meta;
|
|
1744
|
+
const label = formatChipLabel(meta);
|
|
1745
|
+
const attrs = chipAttributes(meta);
|
|
1746
|
+
return `<span ${attrs}>${escapeHtml(label)}</span>`;
|
|
1747
|
+
}
|
|
1748
|
+
function chipAttributes(meta) {
|
|
1749
|
+
const parts = [
|
|
1750
|
+
`class="citation-chip"`,
|
|
1751
|
+
`data-file="${escapeHtml(meta.file)}"`,
|
|
1752
|
+
`data-resolved="${meta.resolved ? "true" : "false"}"`
|
|
1753
|
+
];
|
|
1754
|
+
if (meta.lineStart !== void 0) {
|
|
1755
|
+
parts.push(`data-line-start="${meta.lineStart}"`);
|
|
1756
|
+
}
|
|
1757
|
+
if (meta.lineEnd !== void 0) {
|
|
1758
|
+
parts.push(`data-line-end="${meta.lineEnd}"`);
|
|
1759
|
+
}
|
|
1760
|
+
if (meta.absolutePath !== void 0) {
|
|
1761
|
+
parts.push(`data-absolute-path="${escapeHtml(meta.absolutePath)}"`);
|
|
1762
|
+
}
|
|
1763
|
+
if (meta.editorHref !== void 0) {
|
|
1764
|
+
parts.push(`data-editor-href="${escapeHtml(meta.editorHref)}"`);
|
|
1765
|
+
}
|
|
1766
|
+
return parts.join(" ");
|
|
1767
|
+
}
|
|
1768
|
+
function formatChipLabel(meta) {
|
|
1769
|
+
if (meta.lineStart === void 0) return meta.file;
|
|
1770
|
+
if (meta.lineEnd === void 0 || meta.lineEnd === meta.lineStart) {
|
|
1771
|
+
return `${meta.file}:${meta.lineStart}`;
|
|
1772
|
+
}
|
|
1773
|
+
return `${meta.file}:${meta.lineStart}-${meta.lineEnd}`;
|
|
1774
|
+
}
|
|
1775
|
+
|
|
1776
|
+
// src/viewer/render.ts
|
|
1777
|
+
function renderPageHtml(body, snapshot, options) {
|
|
1778
|
+
const md = buildMarkdownIt(snapshot, options);
|
|
1779
|
+
const rendered = md.render(body);
|
|
1780
|
+
const html = sanitizeHtml(rendered, buildSanitizerPolicy(options));
|
|
1781
|
+
return { html };
|
|
1782
|
+
}
|
|
1783
|
+
function buildMarkdownIt(snapshot, options) {
|
|
1784
|
+
const md = new MarkdownIt({
|
|
1785
|
+
html: false,
|
|
1786
|
+
linkify: false,
|
|
1787
|
+
breaks: false
|
|
1788
|
+
});
|
|
1789
|
+
registerWikilink(md, { pages: snapshot.pages });
|
|
1790
|
+
registerCitation(md, {
|
|
1791
|
+
root: snapshot.root,
|
|
1792
|
+
sourceFiles: new Set(snapshot.sourceFilenames),
|
|
1793
|
+
isLoopback: options.isLoopback
|
|
1794
|
+
});
|
|
1795
|
+
return md;
|
|
1796
|
+
}
|
|
1797
|
+
function buildSanitizerPolicy(options) {
|
|
1798
|
+
const allowedSchemes = ["http", "https", "mailto"];
|
|
1799
|
+
const allowedSchemesAppliedToAttributes = ["href", "src", "cite"];
|
|
1800
|
+
return {
|
|
1801
|
+
allowedTags: [
|
|
1802
|
+
"h1",
|
|
1803
|
+
"h2",
|
|
1804
|
+
"h3",
|
|
1805
|
+
"h4",
|
|
1806
|
+
"h5",
|
|
1807
|
+
"h6",
|
|
1808
|
+
"p",
|
|
1809
|
+
"br",
|
|
1810
|
+
"hr",
|
|
1811
|
+
"ul",
|
|
1812
|
+
"ol",
|
|
1813
|
+
"li",
|
|
1814
|
+
"blockquote",
|
|
1815
|
+
"strong",
|
|
1816
|
+
"em",
|
|
1817
|
+
"b",
|
|
1818
|
+
"i",
|
|
1819
|
+
"s",
|
|
1820
|
+
"u",
|
|
1821
|
+
"code",
|
|
1822
|
+
"pre",
|
|
1823
|
+
"table",
|
|
1824
|
+
"thead",
|
|
1825
|
+
"tbody",
|
|
1826
|
+
"tfoot",
|
|
1827
|
+
"tr",
|
|
1828
|
+
"th",
|
|
1829
|
+
"td",
|
|
1830
|
+
"a",
|
|
1831
|
+
"img",
|
|
1832
|
+
"span",
|
|
1833
|
+
"div"
|
|
1834
|
+
],
|
|
1835
|
+
disallowedTagsMode: "discard",
|
|
1836
|
+
allowedAttributes: {
|
|
1837
|
+
a: ["href", "title", "class", "id", "data-*", "aria-*"],
|
|
1838
|
+
img: ["src", "alt", "title", "class", "id"],
|
|
1839
|
+
span: ["class", "id", "data-*", "aria-*"],
|
|
1840
|
+
div: ["class", "id", "data-*", "aria-*"],
|
|
1841
|
+
th: ["scope", "colspan", "rowspan", "class", "id"],
|
|
1842
|
+
td: ["colspan", "rowspan", "class", "id"],
|
|
1843
|
+
table: ["class", "id"],
|
|
1844
|
+
code: ["class"],
|
|
1845
|
+
"*": ["class", "id"]
|
|
1846
|
+
},
|
|
1847
|
+
allowedSchemes,
|
|
1848
|
+
allowedSchemesByTag: {
|
|
1849
|
+
a: buildAnchorSchemes(),
|
|
1850
|
+
img: ["http", "https", "data"]
|
|
1851
|
+
},
|
|
1852
|
+
allowedSchemesAppliedToAttributes,
|
|
1853
|
+
allowProtocolRelative: false,
|
|
1854
|
+
// `allowedAttributes` above whitelists `class` everywhere via `*`,
|
|
1855
|
+
// so no further class-name allowlist is needed; leaving
|
|
1856
|
+
// `allowedClasses` unset lets every class value through.
|
|
1857
|
+
allowedStyles: {},
|
|
1858
|
+
allowedIframeHostnames: [],
|
|
1859
|
+
transformTags: {
|
|
1860
|
+
a: filterAnchorHref(),
|
|
1861
|
+
img: filterImgSrc,
|
|
1862
|
+
span: filterSpanForLanBind(options)
|
|
1863
|
+
}
|
|
1864
|
+
// sanitize-html's URL filter does not enforce hash-only links by
|
|
1865
|
+
// default; the anchor transform above whitelists `#/…` explicitly.
|
|
1866
|
+
};
|
|
1867
|
+
}
|
|
1868
|
+
function filterSpanForLanBind(options) {
|
|
1869
|
+
return function transformSpan(tagName, attribs) {
|
|
1870
|
+
if (options.isLoopback) return { tagName, attribs };
|
|
1871
|
+
if (!("data-absolute-path" in attribs) && !("data-editor-href" in attribs)) {
|
|
1872
|
+
return { tagName, attribs };
|
|
1873
|
+
}
|
|
1874
|
+
const stripped = {};
|
|
1875
|
+
for (const [key, value] of Object.entries(attribs)) {
|
|
1876
|
+
if (key === "data-absolute-path" || key === "data-editor-href") continue;
|
|
1877
|
+
stripped[key] = value;
|
|
1878
|
+
}
|
|
1879
|
+
return { tagName, attribs: stripped };
|
|
1880
|
+
};
|
|
1881
|
+
}
|
|
1882
|
+
function buildAnchorSchemes() {
|
|
1883
|
+
return ["http", "https", "mailto"];
|
|
1884
|
+
}
|
|
1885
|
+
function filterAnchorHref() {
|
|
1886
|
+
return function transformAnchor(tagName, attribs) {
|
|
1887
|
+
const href = attribs.href;
|
|
1888
|
+
if (typeof href !== "string" || href.length === 0) return { tagName, attribs };
|
|
1889
|
+
if (isAllowedAnchorHref(href)) return { tagName, attribs };
|
|
1890
|
+
const stripped = { ...attribs };
|
|
1891
|
+
delete stripped.href;
|
|
1892
|
+
return { tagName, attribs: stripped };
|
|
1893
|
+
};
|
|
1894
|
+
}
|
|
1895
|
+
function filterImgSrc(tagName, attribs) {
|
|
1896
|
+
const src = attribs.src;
|
|
1897
|
+
if (typeof src !== "string" || src.length === 0) return { tagName, attribs };
|
|
1898
|
+
if (isAllowedImgSrc(src)) return { tagName, attribs };
|
|
1899
|
+
const stripped = { ...attribs };
|
|
1900
|
+
delete stripped.src;
|
|
1901
|
+
return { tagName, attribs: stripped };
|
|
1902
|
+
}
|
|
1903
|
+
function isAllowedAnchorHref(href) {
|
|
1904
|
+
if (href.startsWith("#")) return true;
|
|
1905
|
+
if (href.startsWith("http://") || href.startsWith("https://")) return true;
|
|
1906
|
+
if (href.startsWith("mailto:")) return true;
|
|
1907
|
+
return false;
|
|
1908
|
+
}
|
|
1909
|
+
function isAllowedImgSrc(src) {
|
|
1910
|
+
if (src.startsWith("http://") || src.startsWith("https://")) return true;
|
|
1911
|
+
if (src.startsWith("data:image/")) return true;
|
|
1912
|
+
return false;
|
|
1913
|
+
}
|
|
1914
|
+
|
|
1915
|
+
// src/viewer/search.ts
|
|
1916
|
+
var MAX_QUERY_LENGTH = 200;
|
|
1917
|
+
var MAX_RESULTS = 50;
|
|
1918
|
+
var SNIPPET_RADIUS = 60;
|
|
1919
|
+
var SNIPPET_ELLIPSIS = "\u2026";
|
|
1920
|
+
function searchPages(snapshot, rawQuery) {
|
|
1921
|
+
const tokens = tokenizeQuery(rawQuery);
|
|
1922
|
+
if (tokens.length === 0) return { results: [] };
|
|
1923
|
+
const matches = collectMatches(snapshot.pages, tokens);
|
|
1924
|
+
matches.sort(compareResults);
|
|
1925
|
+
return { results: matches.slice(0, MAX_RESULTS) };
|
|
1926
|
+
}
|
|
1927
|
+
function tokenizeQuery(rawQuery) {
|
|
1928
|
+
if (typeof rawQuery !== "string") return [];
|
|
1929
|
+
const trimmed = rawQuery.trim();
|
|
1930
|
+
if (trimmed.length === 0) return [];
|
|
1931
|
+
const capped = trimmed.slice(0, MAX_QUERY_LENGTH).toLowerCase();
|
|
1932
|
+
return capped.split(/\s+/).filter((t) => t.length > 0);
|
|
1933
|
+
}
|
|
1934
|
+
function collectMatches(pages, tokens) {
|
|
1935
|
+
const matches = [];
|
|
1936
|
+
for (const page of pages) {
|
|
1937
|
+
const result = matchPage(page, tokens);
|
|
1938
|
+
if (result) matches.push(result);
|
|
1939
|
+
}
|
|
1940
|
+
return matches;
|
|
1941
|
+
}
|
|
1942
|
+
function matchPage(page, tokens) {
|
|
1943
|
+
const titleLower = page.title.toLowerCase();
|
|
1944
|
+
const bodyLower = page.body.toLowerCase();
|
|
1945
|
+
for (const token of tokens) {
|
|
1946
|
+
if (!titleLower.includes(token) && !bodyLower.includes(token)) return null;
|
|
1947
|
+
}
|
|
1948
|
+
const allInTitle = tokens.every((t) => titleLower.includes(t));
|
|
1949
|
+
if (allInTitle) return rowFromPage(page, page.title, "title");
|
|
1950
|
+
const snippet = buildBodySnippet(page.body, bodyLower, tokens);
|
|
1951
|
+
return rowFromPage(page, snippet, "body");
|
|
1952
|
+
}
|
|
1953
|
+
function rowFromPage(page, snippet, matchedIn) {
|
|
1954
|
+
return {
|
|
1955
|
+
id: page.id,
|
|
1956
|
+
pageDirectory: page.pageDirectory,
|
|
1957
|
+
title: page.title,
|
|
1958
|
+
snippet,
|
|
1959
|
+
matchedIn
|
|
1960
|
+
};
|
|
1961
|
+
}
|
|
1962
|
+
function buildBodySnippet(body, bodyLower, tokens) {
|
|
1963
|
+
const matchPos = earliestTokenPosition(bodyLower, tokens);
|
|
1964
|
+
const start = Math.max(0, matchPos - SNIPPET_RADIUS);
|
|
1965
|
+
const end = Math.min(body.length, matchPos + SNIPPET_RADIUS);
|
|
1966
|
+
const cleaned = stripInlineMarkdownNoise(body.slice(start, end)).replace(/\s+/g, " ").trim();
|
|
1967
|
+
const prefix = start > 0 ? SNIPPET_ELLIPSIS : "";
|
|
1968
|
+
const suffix = end < body.length ? SNIPPET_ELLIPSIS : "";
|
|
1969
|
+
return `${prefix}${cleaned}${suffix}`;
|
|
1970
|
+
}
|
|
1971
|
+
function stripInlineMarkdownNoise(text) {
|
|
1972
|
+
return text.replace(/!\[([^\]]*)\]\([^)]*\)/g, "$1").replace(/\[([^\]]+)\]\([^)]*\)/g, "$1").replace(/\[\[([^\]|\n]+)\|([^\]\n]+)\]\]/g, "$2").replace(/\[\[([^\]\n]+)\]\]/g, "$1").replace(/\*\*([^*]+)\*\*/g, "$1").replace(/__([^_]+)__/g, "$1").replace(/(?<!\w)\*([^*\n]+)\*(?!\w)/g, "$1").replace(/(?<!\w)_([^_\n]+)_(?!\w)/g, "$1").replace(/`([^`\n]+)`/g, "$1").replace(/~~([^~\n]+)~~/g, "$1");
|
|
1973
|
+
}
|
|
1974
|
+
function earliestTokenPosition(bodyLower, tokens) {
|
|
1975
|
+
let earliest = bodyLower.length;
|
|
1976
|
+
for (const token of tokens) {
|
|
1977
|
+
const idx = bodyLower.indexOf(token);
|
|
1978
|
+
if (idx >= 0 && idx < earliest) earliest = idx;
|
|
1979
|
+
}
|
|
1980
|
+
return earliest;
|
|
1981
|
+
}
|
|
1982
|
+
function compareResults(a, b) {
|
|
1983
|
+
if (a.matchedIn !== b.matchedIn) {
|
|
1984
|
+
return a.matchedIn === "title" ? -1 : 1;
|
|
1985
|
+
}
|
|
1986
|
+
return a.title.localeCompare(b.title);
|
|
1987
|
+
}
|
|
1988
|
+
|
|
1989
|
+
// src/viewer/server.ts
|
|
1990
|
+
var LOOPBACK_HOSTS = /* @__PURE__ */ new Set(["127.0.0.1", "::1"]);
|
|
1991
|
+
var CONTENT_SECURITY_POLICY = "default-src 'self'; script-src 'self'; style-src 'self'; img-src 'self' data:; font-src 'self'; connect-src 'self'; frame-ancestors 'none'; base-uri 'none'; object-src 'none'; form-action 'none'";
|
|
1992
|
+
async function startViewerServer(snapshot, config) {
|
|
1993
|
+
const boundConfig = { ...config };
|
|
1994
|
+
const server = http.createServer((req, res) => {
|
|
1995
|
+
handleRequest(req, res, snapshot, boundConfig).catch((err) => {
|
|
1996
|
+
void err;
|
|
1997
|
+
if (!res.headersSent) {
|
|
1998
|
+
writeJsonError(res, 500, "internal_error", "Unexpected server error.");
|
|
1999
|
+
}
|
|
2000
|
+
});
|
|
2001
|
+
});
|
|
2002
|
+
await new Promise((resolve, reject) => {
|
|
2003
|
+
const onError = (err) => {
|
|
2004
|
+
server.off("listening", onListening);
|
|
2005
|
+
reject(err);
|
|
2006
|
+
};
|
|
2007
|
+
const onListening = () => {
|
|
2008
|
+
server.off("error", onError);
|
|
2009
|
+
resolve();
|
|
2010
|
+
};
|
|
2011
|
+
server.once("error", onError);
|
|
2012
|
+
server.once("listening", onListening);
|
|
2013
|
+
server.listen(config.port, config.host);
|
|
2014
|
+
});
|
|
2015
|
+
const address = server.address();
|
|
2016
|
+
if (!address) throw new Error("server bound but address is null");
|
|
2017
|
+
boundConfig.port = address.port;
|
|
2018
|
+
return {
|
|
2019
|
+
host: config.host,
|
|
2020
|
+
port: address.port,
|
|
2021
|
+
close: () => new Promise((resolve) => server.close(() => resolve()))
|
|
2022
|
+
};
|
|
2023
|
+
}
|
|
2024
|
+
async function handleRequest(req, res, snapshot, config) {
|
|
2025
|
+
applySecurityHeaders(res);
|
|
2026
|
+
if (!validateOriginHeaders(req, config)) {
|
|
2027
|
+
writeJsonError(res, 403, "forbidden", "rejected by origin policy");
|
|
2028
|
+
return;
|
|
2029
|
+
}
|
|
2030
|
+
const url = new URL(req.url ?? "/", buildOriginBase(config));
|
|
2031
|
+
if (!isRouteRegistered(req.method, url.pathname)) {
|
|
2032
|
+
writeJsonError(res, 404, "not_found", `${req.method ?? "?"} ${url.pathname}`);
|
|
2033
|
+
return;
|
|
2034
|
+
}
|
|
2035
|
+
await routeRegistered(req, res, url, snapshot, LOOPBACK_HOSTS.has(config.host));
|
|
2036
|
+
}
|
|
2037
|
+
async function routeRegistered(req, res, parsedUrl, snapshot, isLoopback) {
|
|
2038
|
+
if (parsedUrl.pathname === "/") return handleShell(res, snapshot);
|
|
2039
|
+
if (parsedUrl.pathname.startsWith("/assets/")) return handleAsset(res, parsedUrl.pathname);
|
|
2040
|
+
if (parsedUrl.pathname === "/api/pages") return handleApiPages(res, snapshot);
|
|
2041
|
+
if (parsedUrl.pathname === "/api/index") return handleApiIndex(res, snapshot, isLoopback);
|
|
2042
|
+
if (parsedUrl.pathname === "/api/health") return handleApiHealth(res, snapshot);
|
|
2043
|
+
if (parsedUrl.pathname === "/api/search") return handleApiSearch(res, parsedUrl, snapshot);
|
|
2044
|
+
if (parsedUrl.pathname.startsWith("/api/page/")) {
|
|
2045
|
+
return handleApiPage(res, parsedUrl.pathname, snapshot, isLoopback);
|
|
2046
|
+
}
|
|
2047
|
+
throw new Error(`route registration drift: no handler for ${parsedUrl.pathname}`);
|
|
2048
|
+
}
|
|
2049
|
+
function isRouteRegistered(method, pathname) {
|
|
2050
|
+
if (method !== "GET") return false;
|
|
2051
|
+
if (pathname === "/") return true;
|
|
2052
|
+
if (pathname.startsWith("/assets/")) return true;
|
|
2053
|
+
if (pathname === "/api/pages") return true;
|
|
2054
|
+
if (pathname === "/api/index") return true;
|
|
2055
|
+
if (pathname === "/api/health") return true;
|
|
2056
|
+
if (pathname === "/api/search") return true;
|
|
2057
|
+
if (pathname.startsWith("/api/page/")) return true;
|
|
2058
|
+
return false;
|
|
2059
|
+
}
|
|
2060
|
+
function applySecurityHeaders(res) {
|
|
2061
|
+
res.setHeader("Content-Security-Policy", CONTENT_SECURITY_POLICY);
|
|
2062
|
+
res.setHeader("Cross-Origin-Resource-Policy", "same-origin");
|
|
2063
|
+
res.setHeader("X-Content-Type-Options", "nosniff");
|
|
2064
|
+
res.setHeader("Referrer-Policy", "no-referrer");
|
|
2065
|
+
}
|
|
2066
|
+
function validateOriginHeaders(req, config) {
|
|
2067
|
+
const host = req.headers.host;
|
|
2068
|
+
if (!host || !isAcceptableHost(host, config)) return false;
|
|
2069
|
+
const origin = req.headers.origin;
|
|
2070
|
+
if (typeof origin === "string" && origin.length > 0) {
|
|
2071
|
+
if (!isSameOrigin(origin, config)) return false;
|
|
2072
|
+
}
|
|
2073
|
+
const fetchSite = req.headers["sec-fetch-site"];
|
|
2074
|
+
if (fetchSite === "cross-site") return false;
|
|
2075
|
+
return true;
|
|
2076
|
+
}
|
|
2077
|
+
function isAcceptableHost(hostHeader, config) {
|
|
2078
|
+
for (const acceptable of buildAcceptableHostHeaders(config)) {
|
|
2079
|
+
if (hostHeader === acceptable) return true;
|
|
2080
|
+
}
|
|
2081
|
+
return false;
|
|
2082
|
+
}
|
|
2083
|
+
function buildAcceptableHostHeaders(config) {
|
|
2084
|
+
const formattedBind = formatHostHeader(config.host, config.port);
|
|
2085
|
+
const accepted = [formattedBind];
|
|
2086
|
+
if (config.host === "127.0.0.1" || config.host === "::1") {
|
|
2087
|
+
accepted.push(`localhost:${config.port}`);
|
|
2088
|
+
}
|
|
2089
|
+
return accepted;
|
|
2090
|
+
}
|
|
2091
|
+
function isSameOrigin(origin, config) {
|
|
2092
|
+
try {
|
|
2093
|
+
const parsed = new URL(origin);
|
|
2094
|
+
const expectedHostname = normalizeHostnameForOrigin(config.host);
|
|
2095
|
+
const originHostname = normalizeHostnameForOrigin(parsed.hostname);
|
|
2096
|
+
return originHostname === expectedHostname && Number(parsed.port) === config.port;
|
|
2097
|
+
} catch {
|
|
2098
|
+
return false;
|
|
2099
|
+
}
|
|
2100
|
+
}
|
|
2101
|
+
function formatHostHeader(host, port) {
|
|
2102
|
+
if (host.includes(":")) return `[${host}]:${port}`;
|
|
2103
|
+
return `${host}:${port}`;
|
|
2104
|
+
}
|
|
2105
|
+
function buildOriginBase(config) {
|
|
2106
|
+
if (config.host.includes(":")) return `http://[${config.host}]:${config.port}`;
|
|
2107
|
+
return `http://${config.host}:${config.port}`;
|
|
2108
|
+
}
|
|
2109
|
+
function normalizeHostnameForOrigin(host) {
|
|
2110
|
+
let h = host.toLowerCase();
|
|
2111
|
+
if (h.startsWith("[") && h.endsWith("]")) h = h.slice(1, -1);
|
|
2112
|
+
return h;
|
|
2113
|
+
}
|
|
2114
|
+
async function handleShell(res, snapshot) {
|
|
2115
|
+
const template = await loadShellTemplate(ASSETS_DIR);
|
|
2116
|
+
if (template === null) {
|
|
2117
|
+
writeJsonError(res, 500, "shell_missing", "Viewer shell template not found on disk.");
|
|
2118
|
+
return;
|
|
2119
|
+
}
|
|
2120
|
+
const body = substitutePageIndex(template, snapshot.pages);
|
|
2121
|
+
res.statusCode = 200;
|
|
2122
|
+
res.setHeader("Content-Type", "text/html; charset=utf-8");
|
|
2123
|
+
res.end(body);
|
|
2124
|
+
}
|
|
2125
|
+
function handleApiPages(res, snapshot) {
|
|
2126
|
+
writeJson(res, 200, {
|
|
2127
|
+
project: snapshot.project,
|
|
2128
|
+
counts: {
|
|
2129
|
+
concepts: snapshot.counts.concepts,
|
|
2130
|
+
queries: snapshot.counts.queries,
|
|
2131
|
+
sourceFiles: snapshot.counts.sourceFiles,
|
|
2132
|
+
pendingReviews: snapshot.counts.pendingReviews
|
|
2133
|
+
},
|
|
2134
|
+
index: { available: snapshot.index.available, href: snapshot.index.href },
|
|
2135
|
+
recentPages: snapshot.recentPages,
|
|
2136
|
+
pages: snapshot.pages.map(pageListRow),
|
|
2137
|
+
updatedAt: snapshot.generatedAt
|
|
2138
|
+
});
|
|
2139
|
+
}
|
|
2140
|
+
function pageListRow(page) {
|
|
2141
|
+
return {
|
|
2142
|
+
id: page.id,
|
|
2143
|
+
pageDirectory: page.pageDirectory,
|
|
2144
|
+
slug: page.slug,
|
|
2145
|
+
title: page.title,
|
|
2146
|
+
kind: typeof page.frontmatter.kind === "string" ? page.frontmatter.kind : "concept",
|
|
2147
|
+
summary: typeof page.frontmatter.summary === "string" ? page.frontmatter.summary : "",
|
|
2148
|
+
updatedAt: typeof page.frontmatter.updatedAt === "string" ? page.frontmatter.updatedAt : "",
|
|
2149
|
+
warnings: page.warnings
|
|
2150
|
+
};
|
|
2151
|
+
}
|
|
2152
|
+
function handleApiIndex(res, snapshot, isLoopback) {
|
|
2153
|
+
if (!snapshot.index.available) {
|
|
2154
|
+
writeJsonError(res, 404, "index_unavailable", "wiki/index.md is not present.");
|
|
2155
|
+
return;
|
|
2156
|
+
}
|
|
2157
|
+
const rendered = tryRenderBody(snapshot.index.body, snapshot, isLoopback);
|
|
2158
|
+
if (rendered === null) {
|
|
2159
|
+
writeRenderFailed(res);
|
|
2160
|
+
return;
|
|
2161
|
+
}
|
|
2162
|
+
writeJson(res, 200, {
|
|
2163
|
+
html: rendered.html,
|
|
2164
|
+
outgoingLinks: snapshot.index.outgoingLinks,
|
|
2165
|
+
generatedAt: snapshot.generatedAt
|
|
2166
|
+
});
|
|
2167
|
+
}
|
|
2168
|
+
async function handleApiHealth(res, snapshot) {
|
|
2169
|
+
const health = await buildHealthResponse(snapshot);
|
|
2170
|
+
writeJson(res, 200, health);
|
|
2171
|
+
}
|
|
2172
|
+
function handleApiSearch(res, parsedUrl, snapshot) {
|
|
2173
|
+
const query = parsedUrl.searchParams.get("q") ?? "";
|
|
2174
|
+
writeJson(res, 200, searchPages(snapshot, query));
|
|
2175
|
+
}
|
|
2176
|
+
function handleApiPage(res, pathname, snapshot, isLoopback) {
|
|
2177
|
+
const segments = pathname.replace(/^\/api\/page\//, "").split("/");
|
|
2178
|
+
if (segments.length !== 2) {
|
|
2179
|
+
writeJsonError(res, 400, "bad_request", "Expected /api/page/:directory/:slug");
|
|
2180
|
+
return;
|
|
2181
|
+
}
|
|
2182
|
+
const [directorySegment, encodedSlug] = segments;
|
|
2183
|
+
const decodedSlug = safeDecodeSlug(directorySegment, encodedSlug);
|
|
2184
|
+
if (!decodedSlug) {
|
|
2185
|
+
writeJsonError(res, 400, "bad_request", "Invalid directory or slug.");
|
|
2186
|
+
return;
|
|
2187
|
+
}
|
|
2188
|
+
const page = snapshot.pages.find(
|
|
2189
|
+
(p) => p.pageDirectory === decodedSlug.directory && p.slug === decodedSlug.slug
|
|
2190
|
+
);
|
|
2191
|
+
if (!page) {
|
|
2192
|
+
writeJsonError(res, 404, "page_not_found", `${decodedSlug.directory}/${decodedSlug.slug}`);
|
|
2193
|
+
return;
|
|
2194
|
+
}
|
|
2195
|
+
const rendered = tryRenderBody(page.body, snapshot, isLoopback);
|
|
2196
|
+
if (rendered === null) {
|
|
2197
|
+
writeRenderFailed(res);
|
|
2198
|
+
return;
|
|
2199
|
+
}
|
|
2200
|
+
writeJson(res, 200, pagePayload(page, snapshot, rendered.html));
|
|
2201
|
+
}
|
|
2202
|
+
function safeDecodeSlug(directorySegment, encodedSlug) {
|
|
2203
|
+
if (directorySegment !== "concepts" && directorySegment !== "queries") return null;
|
|
2204
|
+
let decoded;
|
|
2205
|
+
try {
|
|
2206
|
+
decoded = decodeURIComponent(encodedSlug);
|
|
2207
|
+
} catch {
|
|
2208
|
+
return null;
|
|
2209
|
+
}
|
|
2210
|
+
try {
|
|
2211
|
+
assertSafeSlug(decoded);
|
|
2212
|
+
} catch (err) {
|
|
2213
|
+
if (err instanceof PathSafetyError) return null;
|
|
2214
|
+
throw err;
|
|
2215
|
+
}
|
|
2216
|
+
return { directory: directorySegment, slug: decoded };
|
|
2217
|
+
}
|
|
2218
|
+
function pagePayload(page, snapshot, renderedHtml) {
|
|
2219
|
+
return {
|
|
2220
|
+
id: page.id,
|
|
2221
|
+
title: page.title,
|
|
2222
|
+
pageDirectory: page.pageDirectory,
|
|
2223
|
+
slug: page.slug,
|
|
2224
|
+
html: renderedHtml,
|
|
2225
|
+
citations: page.citations,
|
|
2226
|
+
outgoingLinks: page.outgoingLinks,
|
|
2227
|
+
frontmatter: page.frontmatter,
|
|
2228
|
+
warnings: page.warnings,
|
|
2229
|
+
updatedAt: typeof page.frontmatter.updatedAt === "string" ? page.frontmatter.updatedAt : "",
|
|
2230
|
+
createdAt: typeof page.frontmatter.createdAt === "string" ? page.frontmatter.createdAt : "",
|
|
2231
|
+
generatedAt: snapshot.generatedAt
|
|
2232
|
+
};
|
|
2233
|
+
}
|
|
2234
|
+
function tryRenderBody(body, snapshot, isLoopback) {
|
|
2235
|
+
try {
|
|
2236
|
+
return renderPageHtml(body, snapshot, { isLoopback });
|
|
2237
|
+
} catch {
|
|
2238
|
+
return null;
|
|
2239
|
+
}
|
|
2240
|
+
}
|
|
2241
|
+
function writeRenderFailed(res) {
|
|
2242
|
+
writeJsonError(res, 500, "render_failed", "Could not render page.");
|
|
2243
|
+
}
|
|
2244
|
+
function writeJson(res, status2, body) {
|
|
2245
|
+
res.statusCode = status2;
|
|
2246
|
+
res.setHeader("Content-Type", "application/json; charset=utf-8");
|
|
2247
|
+
res.end(JSON.stringify(body));
|
|
2248
|
+
}
|
|
2249
|
+
function writeJsonError(res, status2, code, message) {
|
|
2250
|
+
writeJson(res, status2, { error: { code, message } });
|
|
2251
|
+
}
|
|
2252
|
+
|
|
2253
|
+
// src/viewer/snapshot.ts
|
|
2254
|
+
import { readdir as readdir4, readFile as readFile16, realpath as realpath3 } from "fs/promises";
|
|
2255
|
+
import path21 from "path";
|
|
2256
|
+
|
|
2257
|
+
// src/compiler/candidates.ts
|
|
2258
|
+
import { readdir as readdir3, rename as rename2, unlink, writeFile as writeFile3, mkdir as mkdir4 } from "fs/promises";
|
|
2259
|
+
import { existsSync } from "fs";
|
|
2260
|
+
import path19 from "path";
|
|
2261
|
+
import { randomBytes } from "crypto";
|
|
2262
|
+
var ID_SUFFIX_BYTES = 4;
|
|
2263
|
+
var CANDIDATE_EXT = ".json";
|
|
2264
|
+
function buildCandidateId(slug) {
|
|
2265
|
+
const suffix = randomBytes(ID_SUFFIX_BYTES).toString("hex");
|
|
2266
|
+
return `${slug}-${suffix}`;
|
|
2267
|
+
}
|
|
2268
|
+
function candidatePath(root, id) {
|
|
2269
|
+
return path19.join(root, CANDIDATES_DIR, `${id}${CANDIDATE_EXT}`);
|
|
2270
|
+
}
|
|
2271
|
+
function archivePath(root, id) {
|
|
2272
|
+
return path19.join(root, CANDIDATES_ARCHIVE_DIR, `${id}${CANDIDATE_EXT}`);
|
|
2273
|
+
}
|
|
2274
|
+
async function writeCandidate(root, draft) {
|
|
2275
|
+
const candidate = {
|
|
2276
|
+
id: buildCandidateId(draft.slug),
|
|
2277
|
+
title: draft.title,
|
|
2278
|
+
slug: draft.slug,
|
|
2279
|
+
summary: draft.summary,
|
|
2280
|
+
sources: draft.sources,
|
|
2281
|
+
body: draft.body,
|
|
2282
|
+
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2283
|
+
...draft.sourceStates ? { sourceStates: draft.sourceStates } : {},
|
|
2284
|
+
...draft.schemaViolations ? { schemaViolations: draft.schemaViolations } : {},
|
|
2285
|
+
...draft.provenanceViolations ? { provenanceViolations: draft.provenanceViolations } : {}
|
|
2286
|
+
};
|
|
2287
|
+
await atomicWrite(candidatePath(root, candidate.id), JSON.stringify(candidate, null, 2));
|
|
2288
|
+
return candidate;
|
|
2289
|
+
}
|
|
2290
|
+
function failWithError(message) {
|
|
2291
|
+
status("!", error(message));
|
|
2292
|
+
process.exitCode = 1;
|
|
2293
|
+
return null;
|
|
2294
|
+
}
|
|
2295
|
+
async function loadCandidateOrFail(root, id) {
|
|
2296
|
+
const candidate = await readCandidate(root, id);
|
|
2297
|
+
if (!candidate) return failWithError(`Candidate not found: ${id}`);
|
|
2298
|
+
return candidate;
|
|
2299
|
+
}
|
|
2300
|
+
async function loadCandidateUnderLockOrFail(root, id) {
|
|
2301
|
+
const candidate = await readCandidate(root, id);
|
|
2302
|
+
if (!candidate) {
|
|
2303
|
+
return failWithError(`Candidate ${id} was removed by another process during review.`);
|
|
740
2304
|
}
|
|
2305
|
+
return candidate;
|
|
741
2306
|
}
|
|
742
|
-
async function
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
if (
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
}
|
|
752
|
-
return "file";
|
|
2307
|
+
async function readCandidate(root, id) {
|
|
2308
|
+
const raw = await safeReadFile(candidatePath(root, id));
|
|
2309
|
+
if (!raw) return null;
|
|
2310
|
+
try {
|
|
2311
|
+
const parsed = JSON.parse(raw);
|
|
2312
|
+
if (!isValidCandidate(parsed)) return null;
|
|
2313
|
+
return parsed;
|
|
2314
|
+
} catch {
|
|
2315
|
+
return null;
|
|
753
2316
|
}
|
|
754
|
-
if (isYoutubeUrl(source2)) return "transcript";
|
|
755
|
-
return "web";
|
|
756
2317
|
}
|
|
757
|
-
function
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
ingestedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
762
|
-
};
|
|
763
|
-
if (sourceType !== void 0) {
|
|
764
|
-
meta.sourceType = sourceType;
|
|
765
|
-
}
|
|
766
|
-
if (result.truncated) {
|
|
767
|
-
meta.truncated = true;
|
|
768
|
-
meta.originalChars = result.originalChars;
|
|
769
|
-
}
|
|
770
|
-
const frontmatter = buildFrontmatter(meta);
|
|
771
|
-
return `${frontmatter}
|
|
772
|
-
|
|
773
|
-
${result.content}
|
|
774
|
-
`;
|
|
2318
|
+
function isValidCandidate(value) {
|
|
2319
|
+
if (!value || typeof value !== "object") return false;
|
|
2320
|
+
const candidate = value;
|
|
2321
|
+
return typeof candidate.id === "string" && typeof candidate.title === "string" && typeof candidate.slug === "string" && typeof candidate.body === "string" && Array.isArray(candidate.sources);
|
|
775
2322
|
}
|
|
776
|
-
async function
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
case "file":
|
|
787
|
-
return ingestFile(source2);
|
|
2323
|
+
async function listCandidates(root) {
|
|
2324
|
+
const dir = path19.join(root, CANDIDATES_DIR);
|
|
2325
|
+
if (!existsSync(dir)) return [];
|
|
2326
|
+
const entries = await readdir3(dir, { withFileTypes: true });
|
|
2327
|
+
const candidates = [];
|
|
2328
|
+
for (const entry of entries) {
|
|
2329
|
+
if (!entry.isFile() || !entry.name.endsWith(CANDIDATE_EXT)) continue;
|
|
2330
|
+
const id = entry.name.slice(0, -CANDIDATE_EXT.length);
|
|
2331
|
+
const candidate = await readCandidate(root, id);
|
|
2332
|
+
if (candidate) candidates.push(candidate);
|
|
788
2333
|
}
|
|
2334
|
+
candidates.sort((a, b) => a.generatedAt.localeCompare(b.generatedAt));
|
|
2335
|
+
return candidates;
|
|
789
2336
|
}
|
|
790
|
-
async function
|
|
791
|
-
const
|
|
792
|
-
|
|
793
|
-
await mkdir2(SOURCES_DIR, { recursive: true });
|
|
794
|
-
await writeFile2(destPath, document, "utf-8");
|
|
795
|
-
return destPath;
|
|
2337
|
+
async function countCandidates(root) {
|
|
2338
|
+
const candidates = await listCandidates(root);
|
|
2339
|
+
return candidates.length;
|
|
796
2340
|
}
|
|
797
|
-
async function
|
|
798
|
-
const
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
enforceMinContent(result.content);
|
|
803
|
-
const document = buildDocument(title, source2, result, sourceType);
|
|
804
|
-
const savedPath = await saveSource(title, document);
|
|
805
|
-
return {
|
|
806
|
-
filename: path7.basename(savedPath),
|
|
807
|
-
charCount: result.content.length,
|
|
808
|
-
truncated: result.truncated,
|
|
809
|
-
source: source2,
|
|
810
|
-
sourceType
|
|
811
|
-
};
|
|
2341
|
+
async function deleteCandidate(root, id) {
|
|
2342
|
+
const filePath = candidatePath(root, id);
|
|
2343
|
+
if (!existsSync(filePath)) return false;
|
|
2344
|
+
await unlink(filePath);
|
|
2345
|
+
return true;
|
|
812
2346
|
}
|
|
813
|
-
async function
|
|
814
|
-
const
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
2347
|
+
async function archiveCandidate(root, id) {
|
|
2348
|
+
const sourcePath = candidatePath(root, id);
|
|
2349
|
+
if (!existsSync(sourcePath)) return false;
|
|
2350
|
+
const target = archivePath(root, id);
|
|
2351
|
+
await mkdir4(path19.dirname(target), { recursive: true });
|
|
2352
|
+
try {
|
|
2353
|
+
await rename2(sourcePath, target);
|
|
2354
|
+
} catch {
|
|
2355
|
+
const raw = await safeReadFile(sourcePath);
|
|
2356
|
+
await writeFile3(target, raw, "utf-8");
|
|
2357
|
+
await unlink(sourcePath);
|
|
2358
|
+
}
|
|
2359
|
+
return true;
|
|
821
2360
|
}
|
|
822
2361
|
|
|
823
|
-
// src/commands/compile.ts
|
|
824
|
-
import { existsSync as existsSync7 } from "fs";
|
|
825
|
-
|
|
826
|
-
// src/compiler/index.ts
|
|
827
|
-
import { readFile as readFile14 } from "fs/promises";
|
|
828
|
-
import path21 from "path";
|
|
829
|
-
|
|
830
2362
|
// src/utils/state.ts
|
|
831
|
-
import { readFile as
|
|
832
|
-
import { existsSync } from "fs";
|
|
833
|
-
import
|
|
2363
|
+
import { readFile as readFile15, writeFile as writeFile4, rename as rename3, mkdir as mkdir5, copyFile } from "fs/promises";
|
|
2364
|
+
import { existsSync as existsSync2 } from "fs";
|
|
2365
|
+
import path20 from "path";
|
|
834
2366
|
function emptyState() {
|
|
835
2367
|
return { version: 1, indexHash: "", sources: {} };
|
|
836
2368
|
}
|
|
837
2369
|
async function readState(root) {
|
|
838
|
-
const filePath =
|
|
839
|
-
if (!
|
|
2370
|
+
const filePath = path20.join(root, STATE_FILE);
|
|
2371
|
+
if (!existsSync2(filePath)) {
|
|
840
2372
|
return emptyState();
|
|
841
2373
|
}
|
|
842
2374
|
try {
|
|
843
|
-
const raw = await
|
|
2375
|
+
const raw = await readFile15(filePath, "utf-8");
|
|
844
2376
|
return JSON.parse(raw);
|
|
845
2377
|
} catch {
|
|
846
2378
|
const bakPath = filePath + ".bak";
|
|
@@ -850,12 +2382,12 @@ async function readState(root) {
|
|
|
850
2382
|
}
|
|
851
2383
|
}
|
|
852
2384
|
async function writeState(root, state) {
|
|
853
|
-
const dir =
|
|
854
|
-
await
|
|
855
|
-
const filePath =
|
|
2385
|
+
const dir = path20.join(root, LLMWIKI_DIR);
|
|
2386
|
+
await mkdir5(dir, { recursive: true });
|
|
2387
|
+
const filePath = path20.join(root, STATE_FILE);
|
|
856
2388
|
const tmpPath = filePath + ".tmp";
|
|
857
|
-
await
|
|
858
|
-
await
|
|
2389
|
+
await writeFile4(tmpPath, JSON.stringify(state, null, 2), "utf-8");
|
|
2390
|
+
await rename3(tmpPath, filePath);
|
|
859
2391
|
}
|
|
860
2392
|
async function updateSourceState(root, sourceFile, entry) {
|
|
861
2393
|
const state = await readState(root);
|
|
@@ -868,20 +2400,226 @@ async function removeSourceState(root, sourceFile) {
|
|
|
868
2400
|
await writeState(root, state);
|
|
869
2401
|
}
|
|
870
2402
|
|
|
2403
|
+
// src/viewer/snapshot.ts
|
|
2404
|
+
var RECENT_PAGES_LIMIT = 8;
|
|
2405
|
+
var INDEX_HREF = "/#/index";
|
|
2406
|
+
async function buildViewerSnapshot(root) {
|
|
2407
|
+
const [pages, state, pendingReviews, sourceFilenames, index] = await Promise.all([
|
|
2408
|
+
collectViewerPages(root),
|
|
2409
|
+
readState(root),
|
|
2410
|
+
countCandidates(root),
|
|
2411
|
+
listSourceFiles(root),
|
|
2412
|
+
readIndexFile(root)
|
|
2413
|
+
]);
|
|
2414
|
+
const project = buildProject(root);
|
|
2415
|
+
const counts = {
|
|
2416
|
+
concepts: pages.filter((p) => p.pageDirectory === "concepts").length,
|
|
2417
|
+
queries: pages.filter((p) => p.pageDirectory === "queries").length,
|
|
2418
|
+
sourceFiles: sourceFilenames.length,
|
|
2419
|
+
pendingReviews,
|
|
2420
|
+
compiledSources: Object.keys(state.sources).length
|
|
2421
|
+
};
|
|
2422
|
+
const fullIndex = {
|
|
2423
|
+
available: index.available,
|
|
2424
|
+
href: INDEX_HREF,
|
|
2425
|
+
body: index.body,
|
|
2426
|
+
outgoingLinks: resolveBareSlugList(extractWikilinkSlugs(index.body), pages)
|
|
2427
|
+
};
|
|
2428
|
+
const sourceFileSet = new Set(sourceFilenames);
|
|
2429
|
+
const annotatedPages = pages.map((page) => annotateCitationWarnings(page, sourceFileSet));
|
|
2430
|
+
return {
|
|
2431
|
+
root,
|
|
2432
|
+
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2433
|
+
project,
|
|
2434
|
+
counts,
|
|
2435
|
+
index: fullIndex,
|
|
2436
|
+
recentPages: buildRecentPages(annotatedPages),
|
|
2437
|
+
pages: annotatedPages,
|
|
2438
|
+
sourceFilenames
|
|
2439
|
+
};
|
|
2440
|
+
}
|
|
2441
|
+
function annotateCitationWarnings(page, sourceFiles) {
|
|
2442
|
+
const extra = [];
|
|
2443
|
+
const markerPattern = /\^\[([^\]\n]+)\]/g;
|
|
2444
|
+
let match;
|
|
2445
|
+
while ((match = markerPattern.exec(page.body)) !== null) {
|
|
2446
|
+
appendCitationWarningsForMarker(match[1], sourceFiles, extra);
|
|
2447
|
+
}
|
|
2448
|
+
if (extra.length === 0) return page;
|
|
2449
|
+
return { ...page, warnings: [...page.warnings, ...extra] };
|
|
2450
|
+
}
|
|
2451
|
+
function appendCitationWarningsForMarker(raw, sourceFiles, into) {
|
|
2452
|
+
for (const entry of raw.split(",")) {
|
|
2453
|
+
const trimmed = entry.trim();
|
|
2454
|
+
if (trimmed.length === 0) continue;
|
|
2455
|
+
if (isMalformedCitationEntry(trimmed)) {
|
|
2456
|
+
into.push({
|
|
2457
|
+
code: "malformed_citation",
|
|
2458
|
+
message: `Malformed citation entry: ${trimmed}`
|
|
2459
|
+
});
|
|
2460
|
+
continue;
|
|
2461
|
+
}
|
|
2462
|
+
const file = trimmed.split(/[:#]/)[0];
|
|
2463
|
+
if (file.length > 0 && !sourceFiles.has(file)) {
|
|
2464
|
+
into.push({
|
|
2465
|
+
code: "unresolved_citation",
|
|
2466
|
+
message: `Source not found: ${file}`
|
|
2467
|
+
});
|
|
2468
|
+
}
|
|
2469
|
+
}
|
|
2470
|
+
}
|
|
2471
|
+
function buildProject(root) {
|
|
2472
|
+
const rootName = path21.basename(root);
|
|
2473
|
+
return { title: rootName, rootName };
|
|
2474
|
+
}
|
|
2475
|
+
async function listSourceFiles(root) {
|
|
2476
|
+
let canonicalRoot;
|
|
2477
|
+
try {
|
|
2478
|
+
canonicalRoot = await realpath3(root);
|
|
2479
|
+
} catch {
|
|
2480
|
+
return [];
|
|
2481
|
+
}
|
|
2482
|
+
const expectedDir = path21.join(canonicalRoot, SOURCES_DIR);
|
|
2483
|
+
let realDir;
|
|
2484
|
+
try {
|
|
2485
|
+
realDir = await realpath3(expectedDir);
|
|
2486
|
+
} catch {
|
|
2487
|
+
return [];
|
|
2488
|
+
}
|
|
2489
|
+
if (realDir !== expectedDir) return [];
|
|
2490
|
+
try {
|
|
2491
|
+
const entries = await readdir4(realDir, { withFileTypes: true });
|
|
2492
|
+
return entries.filter((e) => e.isFile()).map((e) => e.name);
|
|
2493
|
+
} catch {
|
|
2494
|
+
return [];
|
|
2495
|
+
}
|
|
2496
|
+
}
|
|
2497
|
+
async function readIndexFile(root) {
|
|
2498
|
+
let canonicalRoot;
|
|
2499
|
+
try {
|
|
2500
|
+
canonicalRoot = await realpath3(root);
|
|
2501
|
+
} catch {
|
|
2502
|
+
return { available: false, body: "" };
|
|
2503
|
+
}
|
|
2504
|
+
const expectedIndex = path21.join(canonicalRoot, "wiki", "index.md");
|
|
2505
|
+
let resolved;
|
|
2506
|
+
try {
|
|
2507
|
+
resolved = await realpath3(expectedIndex);
|
|
2508
|
+
} catch {
|
|
2509
|
+
return { available: false, body: "" };
|
|
2510
|
+
}
|
|
2511
|
+
if (resolved !== expectedIndex) {
|
|
2512
|
+
return { available: false, body: "" };
|
|
2513
|
+
}
|
|
2514
|
+
try {
|
|
2515
|
+
const body = await readFile16(resolved, "utf-8");
|
|
2516
|
+
return { available: true, body };
|
|
2517
|
+
} catch {
|
|
2518
|
+
return { available: false, body: "" };
|
|
2519
|
+
}
|
|
2520
|
+
}
|
|
2521
|
+
function buildRecentPages(pages) {
|
|
2522
|
+
const rows = pages.map((page) => ({
|
|
2523
|
+
id: page.id,
|
|
2524
|
+
pageDirectory: page.pageDirectory,
|
|
2525
|
+
slug: page.slug,
|
|
2526
|
+
title: page.title,
|
|
2527
|
+
updatedAt: typeof page.frontmatter.updatedAt === "string" ? page.frontmatter.updatedAt : ""
|
|
2528
|
+
}));
|
|
2529
|
+
rows.sort((a, b) => b.updatedAt.localeCompare(a.updatedAt));
|
|
2530
|
+
return rows.slice(0, RECENT_PAGES_LIMIT);
|
|
2531
|
+
}
|
|
2532
|
+
|
|
2533
|
+
// src/commands/view.ts
|
|
2534
|
+
var LOOPBACK_HOST = "127.0.0.1";
|
|
2535
|
+
var WILDCARD_HOSTS = /* @__PURE__ */ new Set([
|
|
2536
|
+
"0.0.0.0",
|
|
2537
|
+
"::",
|
|
2538
|
+
"0:0:0:0:0:0:0:0",
|
|
2539
|
+
"0000:0000:0000:0000:0000:0000:0000:0000",
|
|
2540
|
+
"*"
|
|
2541
|
+
]);
|
|
2542
|
+
async function viewCommand(options) {
|
|
2543
|
+
const { host, port } = resolveBindConfig(options);
|
|
2544
|
+
const root = process.cwd();
|
|
2545
|
+
const snapshot = await buildViewerSnapshot(root);
|
|
2546
|
+
const handle = await startViewerServer(snapshot, { host, port });
|
|
2547
|
+
const url = buildReadyUrl(handle.host, handle.port);
|
|
2548
|
+
process.stdout.write(`Viewer ready at ${url}
|
|
2549
|
+
`);
|
|
2550
|
+
if (options.open) openInBrowser(url);
|
|
2551
|
+
registerShutdown(handle.close);
|
|
2552
|
+
}
|
|
2553
|
+
function openInBrowser(url) {
|
|
2554
|
+
const command = process.platform === "darwin" ? "open" : process.platform === "win32" ? "cmd" : "xdg-open";
|
|
2555
|
+
const args = process.platform === "win32" ? ["/c", "start", "", url] : [url];
|
|
2556
|
+
const child = spawn(command, args, { stdio: "ignore", detached: true });
|
|
2557
|
+
child.on("error", () => void 0);
|
|
2558
|
+
child.unref();
|
|
2559
|
+
}
|
|
2560
|
+
function resolveBindConfig(options) {
|
|
2561
|
+
const hostFlag = typeof options.host === "string" && options.host.length > 0;
|
|
2562
|
+
const allowLan = options.allowLan === true;
|
|
2563
|
+
if (hostFlag !== allowLan) {
|
|
2564
|
+
throw new Error(
|
|
2565
|
+
"Privacy gate: --host and --allow-lan must be supplied together. Use both to bind beyond loopback, or neither to keep the viewer on 127.0.0.1."
|
|
2566
|
+
);
|
|
2567
|
+
}
|
|
2568
|
+
const host = hostFlag ? options.host : LOOPBACK_HOST;
|
|
2569
|
+
if (WILDCARD_HOSTS.has(host)) {
|
|
2570
|
+
throw new Error(
|
|
2571
|
+
`--host ${host} is not supported: wildcard binds defeat the viewer's DNS-rebind protection. Use a specific interface IP (e.g. 192.168.1.10) instead.`
|
|
2572
|
+
);
|
|
2573
|
+
}
|
|
2574
|
+
const port = parsePort(options.port);
|
|
2575
|
+
return { host, port };
|
|
2576
|
+
}
|
|
2577
|
+
function buildReadyUrl(host, port) {
|
|
2578
|
+
if (host.includes(":")) return `http://[${host}]:${port}`;
|
|
2579
|
+
return `http://${host}:${port}`;
|
|
2580
|
+
}
|
|
2581
|
+
function parsePort(raw) {
|
|
2582
|
+
if (raw === void 0) return 0;
|
|
2583
|
+
const value = typeof raw === "number" ? raw : Number(raw);
|
|
2584
|
+
if (!Number.isInteger(value) || value < 0 || value > 65535) {
|
|
2585
|
+
throw new Error(`Invalid --port value: ${raw}`);
|
|
2586
|
+
}
|
|
2587
|
+
return value;
|
|
2588
|
+
}
|
|
2589
|
+
function registerShutdown(close) {
|
|
2590
|
+
const shutdown = async () => {
|
|
2591
|
+
try {
|
|
2592
|
+
await close();
|
|
2593
|
+
process.exit(0);
|
|
2594
|
+
} catch {
|
|
2595
|
+
process.exit(1);
|
|
2596
|
+
}
|
|
2597
|
+
};
|
|
2598
|
+
process.once("SIGINT", () => void shutdown());
|
|
2599
|
+
process.once("SIGTERM", () => void shutdown());
|
|
2600
|
+
}
|
|
2601
|
+
|
|
2602
|
+
// src/commands/compile.ts
|
|
2603
|
+
import { existsSync as existsSync7 } from "fs";
|
|
2604
|
+
|
|
2605
|
+
// src/compiler/index.ts
|
|
2606
|
+
import { readFile as readFile23 } from "fs/promises";
|
|
2607
|
+
import path33 from "path";
|
|
2608
|
+
|
|
871
2609
|
// src/compiler/source-state.ts
|
|
872
|
-
import
|
|
2610
|
+
import path23 from "path";
|
|
873
2611
|
|
|
874
2612
|
// src/compiler/hasher.ts
|
|
875
|
-
import { createHash } from "crypto";
|
|
876
|
-
import { readFile as
|
|
877
|
-
import
|
|
2613
|
+
import { createHash as createHash2 } from "crypto";
|
|
2614
|
+
import { readFile as readFile17, readdir as readdir5 } from "fs/promises";
|
|
2615
|
+
import path22 from "path";
|
|
878
2616
|
async function hashFile(filePath) {
|
|
879
|
-
const content = await
|
|
880
|
-
return
|
|
2617
|
+
const content = await readFile17(filePath, "utf-8");
|
|
2618
|
+
return createHash2("sha256").update(content).digest("hex");
|
|
881
2619
|
}
|
|
882
2620
|
async function detectChanges(root, prevState) {
|
|
883
|
-
const sourcesPath =
|
|
884
|
-
const currentFiles = await
|
|
2621
|
+
const sourcesPath = path22.join(root, SOURCES_DIR);
|
|
2622
|
+
const currentFiles = await listSourceFiles2(sourcesPath);
|
|
885
2623
|
const changes = [];
|
|
886
2624
|
for (const file of currentFiles) {
|
|
887
2625
|
const status2 = await classifyFile(root, file, prevState);
|
|
@@ -891,16 +2629,16 @@ async function detectChanges(root, prevState) {
|
|
|
891
2629
|
changes.push(...deletedChanges);
|
|
892
2630
|
return changes;
|
|
893
2631
|
}
|
|
894
|
-
async function
|
|
2632
|
+
async function listSourceFiles2(sourcesPath) {
|
|
895
2633
|
try {
|
|
896
|
-
const entries = await
|
|
2634
|
+
const entries = await readdir5(sourcesPath);
|
|
897
2635
|
return entries.filter((f) => f.endsWith(".md"));
|
|
898
2636
|
} catch {
|
|
899
2637
|
return [];
|
|
900
2638
|
}
|
|
901
2639
|
}
|
|
902
2640
|
async function classifyFile(root, file, prevState) {
|
|
903
|
-
const filePath =
|
|
2641
|
+
const filePath = path22.join(root, SOURCES_DIR, file);
|
|
904
2642
|
const hash = await hashFile(filePath);
|
|
905
2643
|
const prev = prevState.sources[file];
|
|
906
2644
|
if (!prev) return "new";
|
|
@@ -923,7 +2661,7 @@ async function buildExtractionSourceStates(root, extractions) {
|
|
|
923
2661
|
return snapshot;
|
|
924
2662
|
}
|
|
925
2663
|
async function buildEntry(root, result, compiledAt) {
|
|
926
|
-
const filePath =
|
|
2664
|
+
const filePath = path23.join(root, SOURCES_DIR, result.sourceFile);
|
|
927
2665
|
const hash = await hashFile(filePath);
|
|
928
2666
|
return {
|
|
929
2667
|
hash,
|
|
@@ -1012,7 +2750,8 @@ var OpenAIProvider = class {
|
|
|
1012
2750
|
model: this.model,
|
|
1013
2751
|
max_tokens: maxTokens,
|
|
1014
2752
|
messages: [{ role: "system", content: system }, ...messages],
|
|
1015
|
-
tools: openaiTools
|
|
2753
|
+
tools: openaiTools,
|
|
2754
|
+
tool_choice: "required"
|
|
1016
2755
|
});
|
|
1017
2756
|
const toolCalls = response.choices[0]?.message?.tool_calls;
|
|
1018
2757
|
if (toolCalls && toolCalls.length > 0) {
|
|
@@ -1069,8 +2808,24 @@ var MiniMaxProvider = class extends OpenAIProvider {
|
|
|
1069
2808
|
}
|
|
1070
2809
|
};
|
|
1071
2810
|
|
|
2811
|
+
// src/providers/copilot.ts
|
|
2812
|
+
var CopilotProvider = class extends OpenAIProvider {
|
|
2813
|
+
constructor(model, apiKey) {
|
|
2814
|
+
super(model, { baseURL: COPILOT_BASE_URL, apiKey });
|
|
2815
|
+
}
|
|
2816
|
+
/**
|
|
2817
|
+
* GitHub Copilot has no native embeddings API.
|
|
2818
|
+
* Throws an informative error directing the user to an alternative.
|
|
2819
|
+
*/
|
|
2820
|
+
async embed(_text) {
|
|
2821
|
+
throw new Error(
|
|
2822
|
+
"GitHub Copilot does not support embeddings.\n For semantic search (llmwiki query), switch to the OpenAI provider:\n export LLMWIKI_PROVIDER=openai\n export OPENAI_API_KEY=sk-..."
|
|
2823
|
+
);
|
|
2824
|
+
}
|
|
2825
|
+
};
|
|
2826
|
+
|
|
1072
2827
|
// src/utils/provider.ts
|
|
1073
|
-
var SUPPORTED_PROVIDERS = /* @__PURE__ */ new Set(["anthropic", "openai", "ollama", "minimax"]);
|
|
2828
|
+
var SUPPORTED_PROVIDERS = /* @__PURE__ */ new Set(["anthropic", "openai", "ollama", "minimax", "copilot"]);
|
|
1074
2829
|
function getProvider() {
|
|
1075
2830
|
const providerName = getProviderName();
|
|
1076
2831
|
switch (providerName) {
|
|
@@ -1090,6 +2845,8 @@ function getProvider() {
|
|
|
1090
2845
|
});
|
|
1091
2846
|
case "minimax":
|
|
1092
2847
|
return getMiniMaxProvider();
|
|
2848
|
+
case "copilot":
|
|
2849
|
+
return getCopilotProvider();
|
|
1093
2850
|
default:
|
|
1094
2851
|
throw new Error(`Unhandled provider: ${providerName}`);
|
|
1095
2852
|
}
|
|
@@ -1110,6 +2867,15 @@ function getMiniMaxProvider() {
|
|
|
1110
2867
|
}
|
|
1111
2868
|
return new MiniMaxProvider(getModelForProvider("minimax"), apiKey);
|
|
1112
2869
|
}
|
|
2870
|
+
function getCopilotProvider() {
|
|
2871
|
+
const apiKey = process.env.GITHUB_TOKEN;
|
|
2872
|
+
if (!apiKey) {
|
|
2873
|
+
throw new Error(
|
|
2874
|
+
"GitHub Copilot provider requires GITHUB_TOKEN environment variable.\n Run: gh auth refresh --scopes copilot\n Then set it with: export GITHUB_TOKEN=$(gh auth token)\n The token must belong to a GitHub account with an active Copilot subscription."
|
|
2875
|
+
);
|
|
2876
|
+
}
|
|
2877
|
+
return new CopilotProvider(getModelForProvider("copilot"), apiKey);
|
|
2878
|
+
}
|
|
1113
2879
|
function getAnthropicProvider() {
|
|
1114
2880
|
const model = resolveAnthropicModelFromEnv() ?? PROVIDER_MODELS.anthropic;
|
|
1115
2881
|
const baseURL = resolveAnthropicBaseURLFromEnv();
|
|
@@ -1161,8 +2927,8 @@ async function callClaude(options) {
|
|
|
1161
2927
|
}
|
|
1162
2928
|
|
|
1163
2929
|
// src/utils/lock.ts
|
|
1164
|
-
import { open, readFile as
|
|
1165
|
-
import
|
|
2930
|
+
import { open, readFile as readFile18, unlink as unlink2, mkdir as mkdir6 } from "fs/promises";
|
|
2931
|
+
import path24 from "path";
|
|
1166
2932
|
var RECLAIM_SUFFIX = ".reclaim";
|
|
1167
2933
|
var MAX_ACQUIRE_ATTEMPTS = 2;
|
|
1168
2934
|
function isProcessAlive(pid) {
|
|
@@ -1174,8 +2940,8 @@ function isProcessAlive(pid) {
|
|
|
1174
2940
|
}
|
|
1175
2941
|
}
|
|
1176
2942
|
async function acquireLock(root) {
|
|
1177
|
-
const lockPath =
|
|
1178
|
-
await
|
|
2943
|
+
const lockPath = path24.join(root, LOCK_FILE);
|
|
2944
|
+
await mkdir6(path24.join(root, LLMWIKI_DIR), { recursive: true });
|
|
1179
2945
|
for (let attempt = 0; attempt < MAX_ACQUIRE_ATTEMPTS; attempt++) {
|
|
1180
2946
|
const created = await tryCreateLock(lockPath);
|
|
1181
2947
|
if (created) return true;
|
|
@@ -1199,7 +2965,7 @@ async function reclaimStaleLock(root, lockPath) {
|
|
|
1199
2965
|
return false;
|
|
1200
2966
|
}
|
|
1201
2967
|
try {
|
|
1202
|
-
await
|
|
2968
|
+
await unlink2(lockPath);
|
|
1203
2969
|
} catch {
|
|
1204
2970
|
}
|
|
1205
2971
|
const acquired = await tryCreateLock(lockPath);
|
|
@@ -1209,7 +2975,7 @@ async function reclaimStaleLock(root, lockPath) {
|
|
|
1209
2975
|
return acquired;
|
|
1210
2976
|
} finally {
|
|
1211
2977
|
try {
|
|
1212
|
-
await
|
|
2978
|
+
await unlink2(reclaimPath);
|
|
1213
2979
|
} catch {
|
|
1214
2980
|
}
|
|
1215
2981
|
}
|
|
@@ -1218,7 +2984,7 @@ async function acquireReclaimLock(reclaimPath) {
|
|
|
1218
2984
|
if (await tryCreateLock(reclaimPath)) return true;
|
|
1219
2985
|
if (!await isLockStale(reclaimPath)) return false;
|
|
1220
2986
|
try {
|
|
1221
|
-
await
|
|
2987
|
+
await unlink2(reclaimPath);
|
|
1222
2988
|
} catch {
|
|
1223
2989
|
}
|
|
1224
2990
|
return false;
|
|
@@ -1238,7 +3004,7 @@ async function tryCreateLock(lockPath) {
|
|
|
1238
3004
|
}
|
|
1239
3005
|
async function isLockStale(lockPath) {
|
|
1240
3006
|
try {
|
|
1241
|
-
const content = await
|
|
3007
|
+
const content = await readFile18(lockPath, "utf-8");
|
|
1242
3008
|
const pid = parseInt(content.trim(), 10);
|
|
1243
3009
|
if (isNaN(pid)) return true;
|
|
1244
3010
|
return !isProcessAlive(pid);
|
|
@@ -1247,14 +3013,32 @@ async function isLockStale(lockPath) {
|
|
|
1247
3013
|
}
|
|
1248
3014
|
}
|
|
1249
3015
|
async function releaseLock(root) {
|
|
1250
|
-
const lockPath =
|
|
3016
|
+
const lockPath = path24.join(root, LOCK_FILE);
|
|
1251
3017
|
try {
|
|
1252
|
-
await
|
|
3018
|
+
await unlink2(lockPath);
|
|
1253
3019
|
} catch {
|
|
1254
3020
|
}
|
|
1255
3021
|
}
|
|
1256
3022
|
|
|
3023
|
+
// src/utils/output-language.ts
|
|
3024
|
+
var LANG_ENV_VAR = "LLMWIKI_OUTPUT_LANG";
|
|
3025
|
+
function getOutputLanguage() {
|
|
3026
|
+
const raw = process.env[LANG_ENV_VAR];
|
|
3027
|
+
if (!raw) return null;
|
|
3028
|
+
const trimmed = raw.trim();
|
|
3029
|
+
return trimmed.length > 0 ? trimmed : null;
|
|
3030
|
+
}
|
|
3031
|
+
function languageDirective() {
|
|
3032
|
+
const lang = getOutputLanguage();
|
|
3033
|
+
if (!lang) return "";
|
|
3034
|
+
return `Write the output in ${lang}.`;
|
|
3035
|
+
}
|
|
3036
|
+
|
|
1257
3037
|
// src/compiler/prompts.ts
|
|
3038
|
+
function withLangLine(...lines) {
|
|
3039
|
+
const lang = languageDirective();
|
|
3040
|
+
return lang ? [...lines, lang] : lines;
|
|
3041
|
+
}
|
|
1258
3042
|
var PROVENANCE_STATE_VALUES = [
|
|
1259
3043
|
"extracted",
|
|
1260
3044
|
"merged",
|
|
@@ -1309,10 +3093,6 @@ var CONCEPT_EXTRACTION_TOOL = {
|
|
|
1309
3093
|
required: ["slug"]
|
|
1310
3094
|
},
|
|
1311
3095
|
description: "Slugs of other concepts whose evidence contradicts this one."
|
|
1312
|
-
},
|
|
1313
|
-
inferred_paragraphs: {
|
|
1314
|
-
type: "integer",
|
|
1315
|
-
description: "Estimated number of paragraphs in the page that will be inferred rather than directly cited."
|
|
1316
3096
|
}
|
|
1317
3097
|
},
|
|
1318
3098
|
required: ["concept", "summary", "is_new"]
|
|
@@ -1329,11 +3109,13 @@ Here is the existing wiki index \u2014 avoid duplicating concepts already covere
|
|
|
1329
3109
|
|
|
1330
3110
|
${existingIndex}` : "\n\nNo existing wiki pages yet.";
|
|
1331
3111
|
return [
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
3112
|
+
...withLangLine(
|
|
3113
|
+
"You are a knowledge extraction engine. Analyze the following source document",
|
|
3114
|
+
"and identify 3-8 distinct, meaningful concepts worth documenting as wiki pages.",
|
|
3115
|
+
"Each concept should be a standalone topic that someone might look up.",
|
|
3116
|
+
"Focus on key ideas, techniques, patterns, or entities \u2014 not trivial details.",
|
|
3117
|
+
"Use the extract_concepts tool to return your findings."
|
|
3118
|
+
),
|
|
1337
3119
|
"",
|
|
1338
3120
|
"For every concept, emit provenance metadata so downstream tools can reason",
|
|
1339
3121
|
"about reliability:",
|
|
@@ -1343,8 +3125,6 @@ ${existingIndex}` : "\n\nNo existing wiki pages yet.";
|
|
|
1343
3125
|
" or 'ambiguous' if the source is contradictory or unclear.",
|
|
1344
3126
|
" - contradicted_by: slugs of other concepts (in this batch or the index)",
|
|
1345
3127
|
" whose evidence conflicts with this one.",
|
|
1346
|
-
" - inferred_paragraphs: estimated number of paragraphs in the resulting",
|
|
1347
|
-
" page that will be inferred rather than directly citable.",
|
|
1348
3128
|
indexSection,
|
|
1349
3129
|
"\n\n--- SOURCE DOCUMENT ---\n\n",
|
|
1350
3130
|
sourceContent
|
|
@@ -1362,11 +3142,13 @@ Related wiki pages for cross-referencing:
|
|
|
1362
3142
|
|
|
1363
3143
|
${relatedPages}` : "";
|
|
1364
3144
|
return [
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
3145
|
+
...withLangLine(
|
|
3146
|
+
`You are a wiki author. Write a clear, well-structured markdown page about "${concept}".`,
|
|
3147
|
+
"Draw facts only from the provided source material.",
|
|
3148
|
+
"Include a ## Sources section at the end listing the source document.",
|
|
3149
|
+
"Suggest [[wikilinks]] to related concepts where appropriate.",
|
|
3150
|
+
"Write in a neutral, informative tone. Be concise but thorough."
|
|
3151
|
+
),
|
|
1370
3152
|
"",
|
|
1371
3153
|
"Source attribution: at the end of each prose paragraph, append a citation",
|
|
1372
3154
|
"marker showing which source file(s) the paragraph drew from.",
|
|
@@ -1383,7 +3165,7 @@ ${relatedPages}` : "";
|
|
|
1383
3165
|
"",
|
|
1384
3166
|
"If a paragraph is your inference rather than a direct extraction, leave it",
|
|
1385
3167
|
"uncited \u2014 downstream lint rules will count uncited paragraphs as 'inferred'",
|
|
1386
|
-
"
|
|
3168
|
+
"so lint can surface excess-inferred-paragraphs warnings on review.",
|
|
1387
3169
|
existingSection,
|
|
1388
3170
|
relatedSection,
|
|
1389
3171
|
"\n\n--- SOURCE MATERIAL ---\n\n",
|
|
@@ -1415,20 +3197,21 @@ function mapRawConcept(c) {
|
|
|
1415
3197
|
tags: Array.isArray(c.tags) ? c.tags : void 0,
|
|
1416
3198
|
confidence: typeof c.confidence === "number" ? c.confidence : void 0,
|
|
1417
3199
|
provenanceState: provenance,
|
|
1418
|
-
contradictedBy: coerceContradictedBy(c.contradicted_by)
|
|
1419
|
-
inferredParagraphs: typeof c.inferred_paragraphs === "number" && Number.isInteger(c.inferred_paragraphs) && c.inferred_paragraphs >= 0 ? c.inferred_paragraphs : void 0
|
|
3200
|
+
contradictedBy: coerceContradictedBy(c.contradicted_by)
|
|
1420
3201
|
};
|
|
1421
3202
|
}
|
|
1422
3203
|
function buildSeedPagePrompt(seed, rule, relatedPagesContent) {
|
|
1423
3204
|
const minLinks = rule.minWikilinks;
|
|
1424
3205
|
const linkExpectation = minLinks > 0 ? `Include at least ${minLinks} [[wikilinks]] to related pages.` : "Use [[wikilinks]] when referencing other pages.";
|
|
1425
3206
|
return [
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
3207
|
+
...withLangLine(
|
|
3208
|
+
`You are a wiki author. Write a ${seed.kind} page titled "${seed.title}".`,
|
|
3209
|
+
`Page-kind guidance: ${rule.description}`,
|
|
3210
|
+
`Summary line for context: ${seed.summary}`,
|
|
3211
|
+
"Draw facts only from the related wiki pages provided below.",
|
|
3212
|
+
linkExpectation,
|
|
3213
|
+
"Write in a neutral, informative tone. Be concise but thorough."
|
|
3214
|
+
),
|
|
1432
3215
|
"\n\n--- RELATED PAGES ---\n\n",
|
|
1433
3216
|
relatedPagesContent
|
|
1434
3217
|
].join("\n");
|
|
@@ -1489,9 +3272,9 @@ function buildDefaultSchema() {
|
|
|
1489
3272
|
}
|
|
1490
3273
|
|
|
1491
3274
|
// src/schema/loader.ts
|
|
1492
|
-
import { existsSync as
|
|
1493
|
-
import { readFile as
|
|
1494
|
-
import
|
|
3275
|
+
import { existsSync as existsSync3 } from "fs";
|
|
3276
|
+
import { readFile as readFile19 } from "fs/promises";
|
|
3277
|
+
import path25 from "path";
|
|
1495
3278
|
import yaml2 from "js-yaml";
|
|
1496
3279
|
var SCHEMA_CANDIDATE_PATHS = [
|
|
1497
3280
|
".llmwiki/schema.json",
|
|
@@ -1502,8 +3285,8 @@ var SCHEMA_CANDIDATE_PATHS = [
|
|
|
1502
3285
|
];
|
|
1503
3286
|
function findSchemaPath(root) {
|
|
1504
3287
|
for (const candidate of SCHEMA_CANDIDATE_PATHS) {
|
|
1505
|
-
const absolute =
|
|
1506
|
-
if (
|
|
3288
|
+
const absolute = path25.join(root, candidate);
|
|
3289
|
+
if (existsSync3(absolute)) return absolute;
|
|
1507
3290
|
}
|
|
1508
3291
|
return null;
|
|
1509
3292
|
}
|
|
@@ -1555,12 +3338,12 @@ async function loadSchema(root) {
|
|
|
1555
3338
|
const defaults = buildDefaultSchema();
|
|
1556
3339
|
const schemaPath = findSchemaPath(root);
|
|
1557
3340
|
if (!schemaPath) return defaults;
|
|
1558
|
-
const raw = await
|
|
3341
|
+
const raw = await readFile19(schemaPath, "utf-8");
|
|
1559
3342
|
const parsed = parseSchemaFile(schemaPath, raw);
|
|
1560
3343
|
return applyOverrides(defaults, parsed, schemaPath);
|
|
1561
3344
|
}
|
|
1562
3345
|
function defaultSchemaInitPath(root) {
|
|
1563
|
-
return
|
|
3346
|
+
return path25.join(root, SCHEMA_CANDIDATE_PATHS[0]);
|
|
1564
3347
|
}
|
|
1565
3348
|
|
|
1566
3349
|
// src/schema/helpers.ts
|
|
@@ -1732,7 +3515,7 @@ async function freezeFailedExtractions(root, results, frozenSlugs) {
|
|
|
1732
3515
|
}
|
|
1733
3516
|
|
|
1734
3517
|
// src/compiler/orphan.ts
|
|
1735
|
-
import
|
|
3518
|
+
import path26 from "path";
|
|
1736
3519
|
async function markOrphaned(root, sourceFile, state) {
|
|
1737
3520
|
const sourceEntry = state.sources[sourceFile];
|
|
1738
3521
|
if (!sourceEntry) return;
|
|
@@ -1758,7 +3541,7 @@ async function orphanUnownedFrozenPages(root, frozenSlugs) {
|
|
|
1758
3541
|
}
|
|
1759
3542
|
}
|
|
1760
3543
|
async function orphanPage(root, slug, reason) {
|
|
1761
|
-
const pagePath =
|
|
3544
|
+
const pagePath = path26.join(root, CONCEPTS_DIR, `${slug}.md`);
|
|
1762
3545
|
const content = await safeReadFile(pagePath);
|
|
1763
3546
|
if (!content) return;
|
|
1764
3547
|
const { meta } = parseFrontmatter(content);
|
|
@@ -1769,18 +3552,18 @@ async function orphanPage(root, slug, reason) {
|
|
|
1769
3552
|
}
|
|
1770
3553
|
|
|
1771
3554
|
// src/compiler/resolver.ts
|
|
1772
|
-
import { readdir as
|
|
1773
|
-
import
|
|
1774
|
-
import { existsSync as
|
|
3555
|
+
import { readdir as readdir6, readFile as readFile20 } from "fs/promises";
|
|
3556
|
+
import path27 from "path";
|
|
3557
|
+
import { existsSync as existsSync4 } from "fs";
|
|
1775
3558
|
async function buildTitleIndex(root) {
|
|
1776
|
-
const conceptsDir =
|
|
1777
|
-
if (!
|
|
1778
|
-
const files = await
|
|
3559
|
+
const conceptsDir = path27.join(root, CONCEPTS_DIR);
|
|
3560
|
+
if (!existsSync4(conceptsDir)) return [];
|
|
3561
|
+
const files = await readdir6(conceptsDir);
|
|
1779
3562
|
const pages = [];
|
|
1780
3563
|
for (const file of files) {
|
|
1781
3564
|
if (!file.endsWith(".md")) continue;
|
|
1782
|
-
const filePath =
|
|
1783
|
-
const content = await
|
|
3565
|
+
const filePath = path27.join(conceptsDir, file);
|
|
3566
|
+
const content = await readFile20(filePath, "utf-8");
|
|
1784
3567
|
const { meta } = parseFrontmatter(content);
|
|
1785
3568
|
if (meta.title && typeof meta.title === "string" && !meta.orphaned) {
|
|
1786
3569
|
pages.push({
|
|
@@ -1866,7 +3649,7 @@ async function resolveInboundLinks(titleIndex, newSlugs) {
|
|
|
1866
3649
|
let count = 0;
|
|
1867
3650
|
for (const page of titleIndex) {
|
|
1868
3651
|
if (newSlugs.includes(page.slug)) continue;
|
|
1869
|
-
const content = await
|
|
3652
|
+
const content = await readFile20(page.filePath, "utf-8");
|
|
1870
3653
|
const { body } = parseFrontmatter(content);
|
|
1871
3654
|
const linked = addWikilinks(body, newTitles, page.title);
|
|
1872
3655
|
if (linked !== body) {
|
|
@@ -1878,7 +3661,7 @@ async function resolveInboundLinks(titleIndex, newSlugs) {
|
|
|
1878
3661
|
return count;
|
|
1879
3662
|
}
|
|
1880
3663
|
async function linkPage(page, titleIndex) {
|
|
1881
|
-
const content = await
|
|
3664
|
+
const content = await readFile20(page.filePath, "utf-8");
|
|
1882
3665
|
const { body } = parseFrontmatter(content);
|
|
1883
3666
|
const linked = addWikilinks(body, titleIndex, page.title);
|
|
1884
3667
|
if (linked === body) return false;
|
|
@@ -1888,18 +3671,18 @@ async function linkPage(page, titleIndex) {
|
|
|
1888
3671
|
}
|
|
1889
3672
|
|
|
1890
3673
|
// src/compiler/indexgen.ts
|
|
1891
|
-
import { readdir as
|
|
1892
|
-
import
|
|
3674
|
+
import { readdir as readdir7 } from "fs/promises";
|
|
3675
|
+
import path28 from "path";
|
|
1893
3676
|
async function generateIndex(root) {
|
|
1894
3677
|
status("*", info("Generating index..."));
|
|
1895
|
-
const conceptsPath =
|
|
1896
|
-
const queriesPath =
|
|
3678
|
+
const conceptsPath = path28.join(root, CONCEPTS_DIR);
|
|
3679
|
+
const queriesPath = path28.join(root, QUERIES_DIR);
|
|
1897
3680
|
const concepts = await collectPageSummaries(conceptsPath);
|
|
1898
3681
|
const queries = await collectPageSummaries(queriesPath);
|
|
1899
3682
|
concepts.sort((a, b) => a.title.localeCompare(b.title));
|
|
1900
3683
|
queries.sort((a, b) => a.title.localeCompare(b.title));
|
|
1901
3684
|
const indexContent = buildIndexContent(concepts, queries);
|
|
1902
|
-
const indexPath =
|
|
3685
|
+
const indexPath = path28.join(root, INDEX_FILE);
|
|
1903
3686
|
await atomicWrite(indexPath, indexContent);
|
|
1904
3687
|
const total = concepts.length + queries.length;
|
|
1905
3688
|
status("+", success(`Index updated with ${total} pages.`));
|
|
@@ -1907,13 +3690,13 @@ async function generateIndex(root) {
|
|
|
1907
3690
|
async function scanWikiPages(dirPath) {
|
|
1908
3691
|
let files;
|
|
1909
3692
|
try {
|
|
1910
|
-
files = await
|
|
3693
|
+
files = await readdir7(dirPath);
|
|
1911
3694
|
} catch {
|
|
1912
3695
|
return [];
|
|
1913
3696
|
}
|
|
1914
3697
|
const scanned = [];
|
|
1915
3698
|
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
1916
|
-
const content = await safeReadFile(
|
|
3699
|
+
const content = await safeReadFile(path28.join(dirPath, file));
|
|
1917
3700
|
const { meta } = parseFrontmatter(content);
|
|
1918
3701
|
scanned.push({ slug: file.replace(/\.md$/, ""), meta });
|
|
1919
3702
|
}
|
|
@@ -1948,9 +3731,45 @@ function buildIndexContent(concepts, queries) {
|
|
|
1948
3731
|
return lines.join("\n");
|
|
1949
3732
|
}
|
|
1950
3733
|
|
|
3734
|
+
// src/compiler/prompt-budget.ts
|
|
3735
|
+
var TRUNCATION_MARKER = "\n\n[\u2026truncated for prompt budget \u2014 see #39\u2026]";
|
|
3736
|
+
function resolvePromptBudgetChars() {
|
|
3737
|
+
const raw = process.env[PROMPT_BUDGET_ENV_VAR];
|
|
3738
|
+
if (!raw) return DEFAULT_PROMPT_BUDGET_CHARS;
|
|
3739
|
+
const parsed = Number.parseInt(raw, 10);
|
|
3740
|
+
if (!Number.isFinite(parsed) || parsed <= 0) return DEFAULT_PROMPT_BUDGET_CHARS;
|
|
3741
|
+
return parsed;
|
|
3742
|
+
}
|
|
3743
|
+
function buildBudgetedCombinedContent(concept, slices) {
|
|
3744
|
+
const budget = resolvePromptBudgetChars();
|
|
3745
|
+
const totalRaw = slices.reduce((sum, s) => sum + s.content.length, 0);
|
|
3746
|
+
if (totalRaw <= budget) {
|
|
3747
|
+
return formatSlices(slices);
|
|
3748
|
+
}
|
|
3749
|
+
const perSource = Math.max(1, Math.floor(budget / slices.length));
|
|
3750
|
+
warnTruncation(concept, totalRaw, slices.length, perSource, budget);
|
|
3751
|
+
const trimmed = slices.map(
|
|
3752
|
+
(s) => s.content.length > perSource ? { ...s, content: s.content.slice(0, perSource) + TRUNCATION_MARKER } : s
|
|
3753
|
+
);
|
|
3754
|
+
return formatSlices(trimmed);
|
|
3755
|
+
}
|
|
3756
|
+
function formatSlices(slices) {
|
|
3757
|
+
return slices.map((s) => `--- SOURCE: ${s.file} ---
|
|
3758
|
+
|
|
3759
|
+
${s.content}`).join("\n\n");
|
|
3760
|
+
}
|
|
3761
|
+
function warnTruncation(concept, totalRaw, sourceCount, perSource, budget) {
|
|
3762
|
+
status(
|
|
3763
|
+
"!",
|
|
3764
|
+
warn(
|
|
3765
|
+
`Combined source content for "${concept}" (${totalRaw.toLocaleString()} chars across ${sourceCount} sources) exceeds the ${budget.toLocaleString()}-char prompt budget; truncating each source to ~${perSource.toLocaleString()} chars. Raise via ${PROMPT_BUDGET_ENV_VAR} when running against larger-context models.`
|
|
3766
|
+
)
|
|
3767
|
+
);
|
|
3768
|
+
}
|
|
3769
|
+
|
|
1951
3770
|
// src/compiler/obsidian.ts
|
|
1952
|
-
import { readdir as
|
|
1953
|
-
import
|
|
3771
|
+
import { readdir as readdir8 } from "fs/promises";
|
|
3772
|
+
import path29 from "path";
|
|
1954
3773
|
var ABBREVIATION_MIN_WORDS = 3;
|
|
1955
3774
|
var SWAP_CONJUNCTIONS = [" and ", " or "];
|
|
1956
3775
|
function addObsidianMeta(frontmatter, conceptTitle, tags) {
|
|
@@ -1992,23 +3811,23 @@ function generateAbbreviation(title) {
|
|
|
1992
3811
|
return abbreviation;
|
|
1993
3812
|
}
|
|
1994
3813
|
async function generateMOC(root) {
|
|
1995
|
-
const conceptsPath =
|
|
3814
|
+
const conceptsPath = path29.join(root, CONCEPTS_DIR);
|
|
1996
3815
|
const pages = await loadConceptPages(conceptsPath);
|
|
1997
3816
|
const tagGroups = groupPagesByTag(pages);
|
|
1998
3817
|
const content = buildMOCContent(tagGroups);
|
|
1999
|
-
await atomicWrite(
|
|
3818
|
+
await atomicWrite(path29.join(root, MOC_FILE), content);
|
|
2000
3819
|
}
|
|
2001
3820
|
async function loadConceptPages(conceptsPath) {
|
|
2002
3821
|
let files;
|
|
2003
3822
|
try {
|
|
2004
|
-
files = await
|
|
3823
|
+
files = await readdir8(conceptsPath);
|
|
2005
3824
|
} catch {
|
|
2006
3825
|
return [];
|
|
2007
3826
|
}
|
|
2008
3827
|
const pages = [];
|
|
2009
3828
|
for (const file of files) {
|
|
2010
3829
|
if (!file.endsWith(".md")) continue;
|
|
2011
|
-
const content = await safeReadFile(
|
|
3830
|
+
const content = await safeReadFile(path29.join(conceptsPath, file));
|
|
2012
3831
|
if (!content) continue;
|
|
2013
3832
|
const { meta } = parseFrontmatter(content);
|
|
2014
3833
|
if (meta.orphaned) continue;
|
|
@@ -2059,14 +3878,14 @@ function buildMOCContent(tagGroups) {
|
|
|
2059
3878
|
}
|
|
2060
3879
|
|
|
2061
3880
|
// src/utils/embeddings.ts
|
|
2062
|
-
import { readFile as
|
|
2063
|
-
import { existsSync as
|
|
2064
|
-
import
|
|
3881
|
+
import { readFile as readFile21, readdir as readdir9 } from "fs/promises";
|
|
3882
|
+
import { existsSync as existsSync5 } from "fs";
|
|
3883
|
+
import path30 from "path";
|
|
2065
3884
|
|
|
2066
3885
|
// src/utils/retrieval.ts
|
|
2067
|
-
import { createHash as
|
|
3886
|
+
import { createHash as createHash3 } from "crypto";
|
|
2068
3887
|
function hashChunkText(text) {
|
|
2069
|
-
return
|
|
3888
|
+
return createHash3("sha256").update(text, "utf8").digest("hex").slice(0, 16);
|
|
2070
3889
|
}
|
|
2071
3890
|
function splitIntoChunks(body) {
|
|
2072
3891
|
const paragraphs = extractParagraphs(body);
|
|
@@ -2226,13 +4045,13 @@ function findTopKChunks(queryVec, chunks, k) {
|
|
|
2226
4045
|
return scored.slice(0, k);
|
|
2227
4046
|
}
|
|
2228
4047
|
async function readEmbeddingStore(root) {
|
|
2229
|
-
const filePath =
|
|
2230
|
-
if (!
|
|
2231
|
-
const raw = await
|
|
4048
|
+
const filePath = path30.join(root, EMBEDDINGS_FILE);
|
|
4049
|
+
if (!existsSync5(filePath)) return null;
|
|
4050
|
+
const raw = await readFile21(filePath, "utf-8");
|
|
2232
4051
|
return JSON.parse(raw);
|
|
2233
4052
|
}
|
|
2234
4053
|
async function writeEmbeddingStore(root, store) {
|
|
2235
|
-
const filePath =
|
|
4054
|
+
const filePath = path30.join(root, EMBEDDINGS_FILE);
|
|
2236
4055
|
await atomicWrite(filePath, JSON.stringify(store, null, 2));
|
|
2237
4056
|
}
|
|
2238
4057
|
async function findRelevantPages(root, question) {
|
|
@@ -2264,10 +4083,10 @@ async function loadActiveStore(root, hasContent) {
|
|
|
2264
4083
|
async function collectPageRecords(root) {
|
|
2265
4084
|
const records = [];
|
|
2266
4085
|
for (const dir of [CONCEPTS_DIR, QUERIES_DIR]) {
|
|
2267
|
-
const absDir =
|
|
4086
|
+
const absDir = path30.join(root, dir);
|
|
2268
4087
|
let files;
|
|
2269
4088
|
try {
|
|
2270
|
-
files = await
|
|
4089
|
+
files = await readdir9(absDir);
|
|
2271
4090
|
} catch {
|
|
2272
4091
|
continue;
|
|
2273
4092
|
}
|
|
@@ -2279,7 +4098,7 @@ async function collectPageRecords(root) {
|
|
|
2279
4098
|
return records;
|
|
2280
4099
|
}
|
|
2281
4100
|
async function readPageRecord(absDir, file) {
|
|
2282
|
-
const content = await safeReadFile(
|
|
4101
|
+
const content = await safeReadFile(path30.join(absDir, file));
|
|
2283
4102
|
const { meta, body } = parseFrontmatter(content);
|
|
2284
4103
|
if (meta.orphaned || typeof meta.title !== "string") return null;
|
|
2285
4104
|
return {
|
|
@@ -2440,114 +4259,10 @@ function shouldRunEmbedding(modelChanged, toEmbed, previousEntries, previousChun
|
|
|
2440
4259
|
return false;
|
|
2441
4260
|
}
|
|
2442
4261
|
|
|
2443
|
-
// src/compiler/candidates.ts
|
|
2444
|
-
import { readdir as readdir6, rename as rename3, unlink as unlink2, writeFile as writeFile4, mkdir as mkdir5 } from "fs/promises";
|
|
2445
|
-
import { existsSync as existsSync5 } from "fs";
|
|
2446
|
-
import path18 from "path";
|
|
2447
|
-
import { randomBytes } from "crypto";
|
|
2448
|
-
var ID_SUFFIX_BYTES = 4;
|
|
2449
|
-
var CANDIDATE_EXT = ".json";
|
|
2450
|
-
function buildCandidateId(slug) {
|
|
2451
|
-
const suffix = randomBytes(ID_SUFFIX_BYTES).toString("hex");
|
|
2452
|
-
return `${slug}-${suffix}`;
|
|
2453
|
-
}
|
|
2454
|
-
function candidatePath(root, id) {
|
|
2455
|
-
return path18.join(root, CANDIDATES_DIR, `${id}${CANDIDATE_EXT}`);
|
|
2456
|
-
}
|
|
2457
|
-
function archivePath(root, id) {
|
|
2458
|
-
return path18.join(root, CANDIDATES_ARCHIVE_DIR, `${id}${CANDIDATE_EXT}`);
|
|
2459
|
-
}
|
|
2460
|
-
async function writeCandidate(root, draft) {
|
|
2461
|
-
const candidate = {
|
|
2462
|
-
id: buildCandidateId(draft.slug),
|
|
2463
|
-
title: draft.title,
|
|
2464
|
-
slug: draft.slug,
|
|
2465
|
-
summary: draft.summary,
|
|
2466
|
-
sources: draft.sources,
|
|
2467
|
-
body: draft.body,
|
|
2468
|
-
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
2469
|
-
...draft.sourceStates ? { sourceStates: draft.sourceStates } : {},
|
|
2470
|
-
...draft.schemaViolations ? { schemaViolations: draft.schemaViolations } : {}
|
|
2471
|
-
};
|
|
2472
|
-
await atomicWrite(candidatePath(root, candidate.id), JSON.stringify(candidate, null, 2));
|
|
2473
|
-
return candidate;
|
|
2474
|
-
}
|
|
2475
|
-
function failWithError(message) {
|
|
2476
|
-
status("!", error(message));
|
|
2477
|
-
process.exitCode = 1;
|
|
2478
|
-
return null;
|
|
2479
|
-
}
|
|
2480
|
-
async function loadCandidateOrFail(root, id) {
|
|
2481
|
-
const candidate = await readCandidate(root, id);
|
|
2482
|
-
if (!candidate) return failWithError(`Candidate not found: ${id}`);
|
|
2483
|
-
return candidate;
|
|
2484
|
-
}
|
|
2485
|
-
async function loadCandidateUnderLockOrFail(root, id) {
|
|
2486
|
-
const candidate = await readCandidate(root, id);
|
|
2487
|
-
if (!candidate) {
|
|
2488
|
-
return failWithError(`Candidate ${id} was removed by another process during review.`);
|
|
2489
|
-
}
|
|
2490
|
-
return candidate;
|
|
2491
|
-
}
|
|
2492
|
-
async function readCandidate(root, id) {
|
|
2493
|
-
const raw = await safeReadFile(candidatePath(root, id));
|
|
2494
|
-
if (!raw) return null;
|
|
2495
|
-
try {
|
|
2496
|
-
const parsed = JSON.parse(raw);
|
|
2497
|
-
if (!isValidCandidate(parsed)) return null;
|
|
2498
|
-
return parsed;
|
|
2499
|
-
} catch {
|
|
2500
|
-
return null;
|
|
2501
|
-
}
|
|
2502
|
-
}
|
|
2503
|
-
function isValidCandidate(value) {
|
|
2504
|
-
if (!value || typeof value !== "object") return false;
|
|
2505
|
-
const candidate = value;
|
|
2506
|
-
return typeof candidate.id === "string" && typeof candidate.title === "string" && typeof candidate.slug === "string" && typeof candidate.body === "string" && Array.isArray(candidate.sources);
|
|
2507
|
-
}
|
|
2508
|
-
async function listCandidates(root) {
|
|
2509
|
-
const dir = path18.join(root, CANDIDATES_DIR);
|
|
2510
|
-
if (!existsSync5(dir)) return [];
|
|
2511
|
-
const entries = await readdir6(dir, { withFileTypes: true });
|
|
2512
|
-
const candidates = [];
|
|
2513
|
-
for (const entry of entries) {
|
|
2514
|
-
if (!entry.isFile() || !entry.name.endsWith(CANDIDATE_EXT)) continue;
|
|
2515
|
-
const id = entry.name.slice(0, -CANDIDATE_EXT.length);
|
|
2516
|
-
const candidate = await readCandidate(root, id);
|
|
2517
|
-
if (candidate) candidates.push(candidate);
|
|
2518
|
-
}
|
|
2519
|
-
candidates.sort((a, b) => a.generatedAt.localeCompare(b.generatedAt));
|
|
2520
|
-
return candidates;
|
|
2521
|
-
}
|
|
2522
|
-
async function countCandidates(root) {
|
|
2523
|
-
const candidates = await listCandidates(root);
|
|
2524
|
-
return candidates.length;
|
|
2525
|
-
}
|
|
2526
|
-
async function deleteCandidate(root, id) {
|
|
2527
|
-
const filePath = candidatePath(root, id);
|
|
2528
|
-
if (!existsSync5(filePath)) return false;
|
|
2529
|
-
await unlink2(filePath);
|
|
2530
|
-
return true;
|
|
2531
|
-
}
|
|
2532
|
-
async function archiveCandidate(root, id) {
|
|
2533
|
-
const sourcePath = candidatePath(root, id);
|
|
2534
|
-
if (!existsSync5(sourcePath)) return false;
|
|
2535
|
-
const target = archivePath(root, id);
|
|
2536
|
-
await mkdir5(path18.dirname(target), { recursive: true });
|
|
2537
|
-
try {
|
|
2538
|
-
await rename3(sourcePath, target);
|
|
2539
|
-
} catch {
|
|
2540
|
-
const raw = await safeReadFile(sourcePath);
|
|
2541
|
-
await writeFile4(target, raw, "utf-8");
|
|
2542
|
-
await unlink2(sourcePath);
|
|
2543
|
-
}
|
|
2544
|
-
return true;
|
|
2545
|
-
}
|
|
2546
|
-
|
|
2547
4262
|
// src/linter/rules.ts
|
|
2548
|
-
import { readdir as
|
|
4263
|
+
import { readdir as readdir10, readFile as readFile22 } from "fs/promises";
|
|
2549
4264
|
import { existsSync as existsSync6 } from "fs";
|
|
2550
|
-
import
|
|
4265
|
+
import path31 from "path";
|
|
2551
4266
|
var MIN_BODY_LENGTH = 50;
|
|
2552
4267
|
var WIKILINK_PATTERN2 = /\[\[([^\]]+)\]\]/g;
|
|
2553
4268
|
var CITATION_PATTERN = /\^\[([^\]]+)\]/g;
|
|
@@ -2564,26 +4279,26 @@ function findMatchesInContent(content, pattern) {
|
|
|
2564
4279
|
}
|
|
2565
4280
|
async function readMarkdownFiles(dirPath) {
|
|
2566
4281
|
if (!existsSync6(dirPath)) return [];
|
|
2567
|
-
const entries = await
|
|
4282
|
+
const entries = await readdir10(dirPath);
|
|
2568
4283
|
const mdFiles = entries.filter((f) => f.endsWith(".md"));
|
|
2569
4284
|
const results = await Promise.all(
|
|
2570
4285
|
mdFiles.map(async (fileName) => {
|
|
2571
|
-
const filePath =
|
|
2572
|
-
const content = await
|
|
4286
|
+
const filePath = path31.join(dirPath, fileName);
|
|
4287
|
+
const content = await readFile22(filePath, "utf-8");
|
|
2573
4288
|
return { filePath, content };
|
|
2574
4289
|
})
|
|
2575
4290
|
);
|
|
2576
4291
|
return results;
|
|
2577
4292
|
}
|
|
2578
4293
|
async function collectAllPages(root) {
|
|
2579
|
-
const conceptPages = await readMarkdownFiles(
|
|
2580
|
-
const queryPages = await readMarkdownFiles(
|
|
4294
|
+
const conceptPages = await readMarkdownFiles(path31.join(root, CONCEPTS_DIR));
|
|
4295
|
+
const queryPages = await readMarkdownFiles(path31.join(root, QUERIES_DIR));
|
|
2581
4296
|
return [...conceptPages, ...queryPages];
|
|
2582
4297
|
}
|
|
2583
4298
|
function buildPageSlugSet(pages) {
|
|
2584
4299
|
const slugs = /* @__PURE__ */ new Set();
|
|
2585
4300
|
for (const page of pages) {
|
|
2586
|
-
const baseName =
|
|
4301
|
+
const baseName = path31.basename(page.filePath, ".md");
|
|
2587
4302
|
slugs.add(baseName.toLowerCase());
|
|
2588
4303
|
}
|
|
2589
4304
|
return slugs;
|
|
@@ -2730,9 +4445,8 @@ async function checkInferredWithoutCitations(root) {
|
|
|
2730
4445
|
const pages = await collectAllPages(root);
|
|
2731
4446
|
const results = [];
|
|
2732
4447
|
for (const page of pages) {
|
|
2733
|
-
const {
|
|
2734
|
-
const
|
|
2735
|
-
const inferred = provenance.inferredParagraphs ?? countUncitedProseParagraphs(body);
|
|
4448
|
+
const { body } = parseFrontmatter(page.content);
|
|
4449
|
+
const inferred = countUncitedProseParagraphs(body);
|
|
2736
4450
|
if (inferred <= MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS) continue;
|
|
2737
4451
|
results.push({
|
|
2738
4452
|
rule: "excess-inferred-paragraphs",
|
|
@@ -2743,7 +4457,7 @@ async function checkInferredWithoutCitations(root) {
|
|
|
2743
4457
|
}
|
|
2744
4458
|
return results;
|
|
2745
4459
|
}
|
|
2746
|
-
var PROSE_PARAGRAPH_LEAD =
|
|
4460
|
+
var PROSE_PARAGRAPH_LEAD = new RegExp("^\\p{L}", "u");
|
|
2747
4461
|
function countUncitedProseParagraphs(body) {
|
|
2748
4462
|
const paragraphs = body.split(/\n\s*\n/);
|
|
2749
4463
|
let count = 0;
|
|
@@ -2766,18 +4480,7 @@ async function checkSchemaCrossLinks(root, schema) {
|
|
|
2766
4480
|
const pages = await collectAllPages(root);
|
|
2767
4481
|
const results = [];
|
|
2768
4482
|
for (const page of pages) {
|
|
2769
|
-
|
|
2770
|
-
const kind = resolvePageKind(meta.kind, schema);
|
|
2771
|
-
const rule = schema.kinds[kind];
|
|
2772
|
-
if (rule.minWikilinks <= 0) continue;
|
|
2773
|
-
const linkCount = countWikilinks(body);
|
|
2774
|
-
if (linkCount >= rule.minWikilinks) continue;
|
|
2775
|
-
results.push({
|
|
2776
|
-
rule: "schema-cross-link-minimum",
|
|
2777
|
-
severity: "warning",
|
|
2778
|
-
file: page.filePath,
|
|
2779
|
-
message: `Page kind "${kind}" requires at least ${rule.minWikilinks} [[wikilinks]] but only ${linkCount} found.`
|
|
2780
|
-
});
|
|
4483
|
+
results.push(...checkPageCrossLinks(page.content, page.filePath, schema));
|
|
2781
4484
|
}
|
|
2782
4485
|
return results;
|
|
2783
4486
|
}
|
|
@@ -2818,13 +4521,24 @@ function countLines(content) {
|
|
|
2818
4521
|
}
|
|
2819
4522
|
async function checkBrokenCitations(root) {
|
|
2820
4523
|
const pages = await collectAllPages(root);
|
|
2821
|
-
const sourcesDir =
|
|
4524
|
+
const sourcesDir = path31.join(root, SOURCES_DIR);
|
|
2822
4525
|
const results = [];
|
|
2823
4526
|
const lineCountCache = /* @__PURE__ */ new Map();
|
|
2824
4527
|
for (const page of pages) {
|
|
2825
|
-
|
|
2826
|
-
|
|
2827
|
-
|
|
4528
|
+
const pageFindings = await checkPageBrokenCitations(
|
|
4529
|
+
page.content,
|
|
4530
|
+
page.filePath,
|
|
4531
|
+
sourcesDir,
|
|
4532
|
+
lineCountCache
|
|
4533
|
+
);
|
|
4534
|
+
results.push(...pageFindings);
|
|
4535
|
+
}
|
|
4536
|
+
return results;
|
|
4537
|
+
}
|
|
4538
|
+
async function checkPageBrokenCitations(content, filePath, sourcesDir, lineCountCache = /* @__PURE__ */ new Map()) {
|
|
4539
|
+
const results = [];
|
|
4540
|
+
for (const { captured, line } of findMatchesInContent(content, CITATION_PATTERN)) {
|
|
4541
|
+
await collectBrokenForMarker(captured, line, filePath, sourcesDir, lineCountCache, results);
|
|
2828
4542
|
}
|
|
2829
4543
|
return results;
|
|
2830
4544
|
}
|
|
@@ -2833,7 +4547,7 @@ async function collectBrokenForMarker(captured, line, pageFile, sourcesDir, line
|
|
|
2833
4547
|
const trimmed = part.trim();
|
|
2834
4548
|
if (trimmed.length === 0) continue;
|
|
2835
4549
|
const filename = stripSpanSuffix(trimmed);
|
|
2836
|
-
const citedPath =
|
|
4550
|
+
const citedPath = path31.join(sourcesDir, filename);
|
|
2837
4551
|
if (!existsSync6(citedPath)) {
|
|
2838
4552
|
out.push({
|
|
2839
4553
|
rule: "broken-citation",
|
|
@@ -2869,25 +4583,30 @@ async function checkMalformedClaimCitations(root) {
|
|
|
2869
4583
|
const pages = await collectAllPages(root);
|
|
2870
4584
|
const results = [];
|
|
2871
4585
|
for (const page of pages) {
|
|
2872
|
-
|
|
2873
|
-
|
|
2874
|
-
|
|
2875
|
-
|
|
2876
|
-
|
|
2877
|
-
|
|
2878
|
-
|
|
2879
|
-
|
|
2880
|
-
|
|
2881
|
-
|
|
2882
|
-
|
|
4586
|
+
results.push(...checkPageMalformedCitations(page.content, page.filePath));
|
|
4587
|
+
}
|
|
4588
|
+
return results;
|
|
4589
|
+
}
|
|
4590
|
+
function checkPageMalformedCitations(content, filePath) {
|
|
4591
|
+
const results = [];
|
|
4592
|
+
for (const { captured, line } of findMatchesInContent(content, CITATION_PATTERN)) {
|
|
4593
|
+
for (const part of captured.split(",")) {
|
|
4594
|
+
if (!isMalformedCitationEntry(part)) continue;
|
|
4595
|
+
results.push({
|
|
4596
|
+
rule: "malformed-claim-citation",
|
|
4597
|
+
severity: "error",
|
|
4598
|
+
file: filePath,
|
|
4599
|
+
message: `Malformed claim citation ^[${captured}] \u2014 expected file.md, file.md:N-N, or file.md#LN-LN`,
|
|
4600
|
+
line
|
|
4601
|
+
});
|
|
2883
4602
|
}
|
|
2884
4603
|
}
|
|
2885
4604
|
return results;
|
|
2886
4605
|
}
|
|
2887
4606
|
|
|
2888
4607
|
// src/compiler/page-renderer.ts
|
|
2889
|
-
import { readdir as
|
|
2890
|
-
import
|
|
4608
|
+
import { readdir as readdir11 } from "fs/promises";
|
|
4609
|
+
import path32 from "path";
|
|
2891
4610
|
|
|
2892
4611
|
// src/compiler/provenance.ts
|
|
2893
4612
|
function addProvenanceMeta(fields, concept) {
|
|
@@ -2900,9 +4619,6 @@ function addProvenanceMeta(fields, concept) {
|
|
|
2900
4619
|
if (concept.contradictedBy && concept.contradictedBy.length > 0) {
|
|
2901
4620
|
fields.contradictedBy = concept.contradictedBy;
|
|
2902
4621
|
}
|
|
2903
|
-
if (typeof concept.inferredParagraphs === "number") {
|
|
2904
|
-
fields.inferredParagraphs = concept.inferredParagraphs;
|
|
2905
|
-
}
|
|
2906
4622
|
}
|
|
2907
4623
|
function reportContradictionWarnings(conceptTitle, concept) {
|
|
2908
4624
|
const refs = concept.contradictedBy;
|
|
@@ -2917,7 +4633,7 @@ function reportContradictionWarnings(conceptTitle, concept) {
|
|
|
2917
4633
|
// src/compiler/page-renderer.ts
|
|
2918
4634
|
var RELATED_PAGE_CONTEXT_LIMIT = 5;
|
|
2919
4635
|
async function renderMergedPageContent(root, entry, schema) {
|
|
2920
|
-
const pagePath =
|
|
4636
|
+
const pagePath = path32.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
|
|
2921
4637
|
const existingPage = await safeReadFile(pagePath);
|
|
2922
4638
|
const relatedPages = await loadRelatedPages(root, entry.slug);
|
|
2923
4639
|
const system = buildPagePrompt(
|
|
@@ -2956,17 +4672,17 @@ function buildMergedFrontmatter(entry, existingPage, schema) {
|
|
|
2956
4672
|
return buildFrontmatter(frontmatterFields);
|
|
2957
4673
|
}
|
|
2958
4674
|
async function loadRelatedPages(root, excludeSlug) {
|
|
2959
|
-
const conceptsPath =
|
|
4675
|
+
const conceptsPath = path32.join(root, CONCEPTS_DIR);
|
|
2960
4676
|
let files;
|
|
2961
4677
|
try {
|
|
2962
|
-
files = await
|
|
4678
|
+
files = await readdir11(conceptsPath);
|
|
2963
4679
|
} catch {
|
|
2964
4680
|
return "";
|
|
2965
4681
|
}
|
|
2966
4682
|
const related = files.filter((f) => f.endsWith(".md") && f !== `${excludeSlug}.md`).slice(0, RELATED_PAGE_CONTEXT_LIMIT);
|
|
2967
4683
|
const contents = [];
|
|
2968
4684
|
for (const f of related) {
|
|
2969
|
-
const content = await safeReadFile(
|
|
4685
|
+
const content = await safeReadFile(path32.join(conceptsPath, f));
|
|
2970
4686
|
if (!content) continue;
|
|
2971
4687
|
const { meta } = parseFrontmatter(content);
|
|
2972
4688
|
if (meta.orphaned) continue;
|
|
@@ -3020,7 +4736,7 @@ async function generatePagesPhase(root, extractions, frozenSlugs, schema, option
|
|
|
3020
4736
|
return entry;
|
|
3021
4737
|
}))
|
|
3022
4738
|
);
|
|
3023
|
-
return { pages, errors, candidates };
|
|
4739
|
+
return { pages, errors, candidates, seedSlugs: [] };
|
|
3024
4740
|
}
|
|
3025
4741
|
async function persistExtractionStates(root, extractions) {
|
|
3026
4742
|
for (const result of extractions) {
|
|
@@ -3046,12 +4762,13 @@ function summarizeCompile(buckets, generation, extractions, options) {
|
|
|
3046
4762
|
errors.push(`No concepts extracted from ${result.sourceFile}`);
|
|
3047
4763
|
}
|
|
3048
4764
|
}
|
|
4765
|
+
const conceptSlugs = generation.pages.map((entry) => entry.slug);
|
|
3049
4766
|
const baseResult = {
|
|
3050
4767
|
compiled: buckets.toCompile.length,
|
|
3051
4768
|
skipped: buckets.unchanged.length,
|
|
3052
4769
|
deleted: buckets.deleted.length,
|
|
3053
4770
|
concepts: generation.pages.map((entry) => entry.concept.concept),
|
|
3054
|
-
pages: generation.
|
|
4771
|
+
pages: [...conceptSlugs, ...generation.seedSlugs],
|
|
3055
4772
|
errors
|
|
3056
4773
|
};
|
|
3057
4774
|
if (options.review) {
|
|
@@ -3069,12 +4786,21 @@ async function runCompilePipeline(root, options) {
|
|
|
3069
4786
|
if (buckets.toCompile.length === 0 && buckets.deleted.length === 0) {
|
|
3070
4787
|
status("\u2713", success("Nothing to compile \u2014 all sources up to date."));
|
|
3071
4788
|
if (!options.review) {
|
|
3072
|
-
const emptyGeneration = {
|
|
4789
|
+
const emptyGeneration = {
|
|
4790
|
+
pages: [],
|
|
4791
|
+
errors: [],
|
|
4792
|
+
candidates: [],
|
|
4793
|
+
seedSlugs: []
|
|
4794
|
+
};
|
|
3073
4795
|
await generateSeedPages(root, schema, emptyGeneration);
|
|
3074
|
-
await finalizeWiki(root, emptyGeneration.pages);
|
|
4796
|
+
await finalizeWiki(root, emptyGeneration.pages, emptyGeneration.seedSlugs);
|
|
3075
4797
|
return {
|
|
3076
4798
|
...emptyCompileResult(),
|
|
3077
4799
|
skipped: buckets.unchanged.length,
|
|
4800
|
+
// Surface seed-page slugs alongside any errors so downstream
|
|
4801
|
+
// consumers (MCP, embeddings, programmatic callers) can see what
|
|
4802
|
+
// landed even on the no-source-changes early-return path.
|
|
4803
|
+
pages: [...emptyGeneration.seedSlugs],
|
|
3078
4804
|
errors: emptyGeneration.errors
|
|
3079
4805
|
};
|
|
3080
4806
|
}
|
|
@@ -3098,7 +4824,7 @@ async function runCompilePipeline(root, options) {
|
|
|
3098
4824
|
}
|
|
3099
4825
|
await persistFrozenSlugs(root, frozenSlugs, extractions);
|
|
3100
4826
|
await generateSeedPages(root, schema, generation);
|
|
3101
|
-
await finalizeWiki(root, generation.pages);
|
|
4827
|
+
await finalizeWiki(root, generation.pages, generation.seedSlugs);
|
|
3102
4828
|
}
|
|
3103
4829
|
return summarizeCompile(buckets, generation, extractions, options);
|
|
3104
4830
|
}
|
|
@@ -3135,9 +4861,11 @@ async function runExtractionPhases(root, toCompile, state, allChanges) {
|
|
|
3135
4861
|
}
|
|
3136
4862
|
return extractions;
|
|
3137
4863
|
}
|
|
3138
|
-
async function finalizeWiki(root, pages) {
|
|
3139
|
-
const
|
|
3140
|
-
const
|
|
4864
|
+
async function finalizeWiki(root, pages, seedSlugs = []) {
|
|
4865
|
+
const conceptChangedSlugs = pages.map((entry) => entry.slug);
|
|
4866
|
+
const conceptNewSlugs = pages.filter((entry) => entry.concept.is_new).map((entry) => entry.slug);
|
|
4867
|
+
const allChangedSlugs = [...conceptChangedSlugs, ...seedSlugs];
|
|
4868
|
+
const allNewSlugs = [...conceptNewSlugs, ...seedSlugs];
|
|
3141
4869
|
if (allChangedSlugs.length > 0) {
|
|
3142
4870
|
status("\u{1F517}", info("Resolving interlinks..."));
|
|
3143
4871
|
await resolveLinks(root, allChangedSlugs, allNewSlugs);
|
|
@@ -3167,9 +4895,9 @@ function printChangesSummary(changes) {
|
|
|
3167
4895
|
}
|
|
3168
4896
|
async function extractForSource(root, sourceFile) {
|
|
3169
4897
|
status("*", info(`Extracting: ${sourceFile}`));
|
|
3170
|
-
const sourcePath =
|
|
3171
|
-
const sourceContent = await
|
|
3172
|
-
const existingIndex = await safeReadFile(
|
|
4898
|
+
const sourcePath = path33.join(root, SOURCES_DIR, sourceFile);
|
|
4899
|
+
const sourceContent = await readFile23(sourcePath, "utf-8");
|
|
4900
|
+
const existingIndex = await safeReadFile(path33.join(root, INDEX_FILE));
|
|
3173
4901
|
const concepts = await extractConcepts(sourceContent, existingIndex);
|
|
3174
4902
|
if (concepts.length > 0) {
|
|
3175
4903
|
const names = concepts.map((c) => c.concept).join(", ");
|
|
@@ -3192,13 +4920,11 @@ function reconcileConceptMetadata(existing, incoming) {
|
|
|
3192
4920
|
}
|
|
3193
4921
|
}
|
|
3194
4922
|
reconciled.contradictedBy = refs.length > 0 ? refs : void 0;
|
|
3195
|
-
if (typeof incoming.inferredParagraphs === "number") {
|
|
3196
|
-
reconciled.inferredParagraphs = typeof existing.inferredParagraphs === "number" ? Math.max(existing.inferredParagraphs, incoming.inferredParagraphs) : incoming.inferredParagraphs;
|
|
3197
|
-
}
|
|
3198
4923
|
return reconciled;
|
|
3199
4924
|
}
|
|
3200
4925
|
function mergeExtractions(extractions, frozenSlugs) {
|
|
3201
4926
|
const bySlug = /* @__PURE__ */ new Map();
|
|
4927
|
+
const slicesBySlug = /* @__PURE__ */ new Map();
|
|
3202
4928
|
for (const result of extractions) {
|
|
3203
4929
|
if (result.concepts.length === 0) continue;
|
|
3204
4930
|
for (const concept of result.concepts) {
|
|
@@ -3208,23 +4934,28 @@ function mergeExtractions(extractions, frozenSlugs) {
|
|
|
3208
4934
|
if (existing) {
|
|
3209
4935
|
existing.concept = reconcileConceptMetadata(existing.concept, concept);
|
|
3210
4936
|
existing.sourceFiles.push(result.sourceFile);
|
|
3211
|
-
existing.combinedContent += `
|
|
3212
|
-
|
|
3213
|
-
--- SOURCE: ${result.sourceFile} ---
|
|
3214
|
-
|
|
3215
|
-
${result.sourceContent}`;
|
|
3216
4937
|
} else {
|
|
3217
4938
|
bySlug.set(slug, {
|
|
3218
4939
|
slug,
|
|
3219
4940
|
concept,
|
|
3220
4941
|
sourceFiles: [result.sourceFile],
|
|
3221
|
-
combinedContent:
|
|
3222
|
-
|
|
3223
|
-
${result.sourceContent}`
|
|
4942
|
+
combinedContent: ""
|
|
3224
4943
|
});
|
|
4944
|
+
slicesBySlug.set(slug, []);
|
|
3225
4945
|
}
|
|
4946
|
+
slicesBySlug.get(slug).push({
|
|
4947
|
+
file: result.sourceFile,
|
|
4948
|
+
content: result.sourceContent
|
|
4949
|
+
});
|
|
3226
4950
|
}
|
|
3227
4951
|
}
|
|
4952
|
+
for (const merged of bySlug.values()) {
|
|
4953
|
+
const slices = slicesBySlug.get(merged.slug) ?? [];
|
|
4954
|
+
merged.combinedContent = buildBudgetedCombinedContent(
|
|
4955
|
+
merged.concept.concept,
|
|
4956
|
+
slices
|
|
4957
|
+
);
|
|
4958
|
+
}
|
|
3228
4959
|
return Array.from(bySlug.values());
|
|
3229
4960
|
}
|
|
3230
4961
|
async function generateMergedPage(root, entry, schema, options, sourceStates) {
|
|
@@ -3232,13 +4963,18 @@ async function generateMergedPage(root, entry, schema, options, sourceStates) {
|
|
|
3232
4963
|
if (options.review) {
|
|
3233
4964
|
return await persistReviewCandidate(root, entry, fullPage, sourceStates, schema);
|
|
3234
4965
|
}
|
|
3235
|
-
const pagePath =
|
|
4966
|
+
const pagePath = path33.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
|
|
3236
4967
|
const error2 = await writePageIfValid(pagePath, fullPage, entry.concept.concept);
|
|
3237
4968
|
return { error: error2 ?? void 0 };
|
|
3238
4969
|
}
|
|
3239
4970
|
async function persistReviewCandidate(root, entry, fullPage, sourceStates, schema) {
|
|
3240
4971
|
const virtualPath = `wiki/concepts/${entry.slug}.md`;
|
|
3241
|
-
const
|
|
4972
|
+
const schemaViolations = checkPageCrossLinks(fullPage, virtualPath, schema);
|
|
4973
|
+
const provenanceViolations = await collectCandidateProvenanceViolations(
|
|
4974
|
+
root,
|
|
4975
|
+
fullPage,
|
|
4976
|
+
virtualPath
|
|
4977
|
+
);
|
|
3242
4978
|
const candidate = await writeCandidate(root, {
|
|
3243
4979
|
title: entry.concept.concept,
|
|
3244
4980
|
slug: entry.slug,
|
|
@@ -3246,21 +4982,35 @@ async function persistReviewCandidate(root, entry, fullPage, sourceStates, schem
|
|
|
3246
4982
|
sources: entry.sourceFiles,
|
|
3247
4983
|
body: fullPage,
|
|
3248
4984
|
sourceStates: pickStatesForSources(sourceStates, entry.sourceFiles),
|
|
3249
|
-
schemaViolations:
|
|
4985
|
+
schemaViolations: schemaViolations.length > 0 ? schemaViolations : void 0,
|
|
4986
|
+
provenanceViolations: provenanceViolations.length > 0 ? provenanceViolations : void 0
|
|
3250
4987
|
});
|
|
3251
4988
|
status("?", info(`Candidate ready: ${candidate.id} (${entry.slug})`));
|
|
3252
4989
|
return { candidateId: candidate.id };
|
|
3253
4990
|
}
|
|
4991
|
+
async function collectCandidateProvenanceViolations(root, fullPage, virtualPath) {
|
|
4992
|
+
const malformed = checkPageMalformedCitations(fullPage, virtualPath);
|
|
4993
|
+
const broken = await checkPageBrokenCitations(
|
|
4994
|
+
fullPage,
|
|
4995
|
+
virtualPath,
|
|
4996
|
+
path33.join(root, SOURCES_DIR)
|
|
4997
|
+
);
|
|
4998
|
+
return [...malformed, ...broken];
|
|
4999
|
+
}
|
|
3254
5000
|
async function generateSeedPages(root, schema, generation) {
|
|
3255
5001
|
if (schema.seedPages.length === 0) return;
|
|
3256
5002
|
for (const seed of schema.seedPages) {
|
|
3257
|
-
const
|
|
3258
|
-
if (
|
|
5003
|
+
const result = await generateSingleSeedPage(root, schema, seed);
|
|
5004
|
+
if (result.error) {
|
|
5005
|
+
generation.errors.push(result.error);
|
|
5006
|
+
continue;
|
|
5007
|
+
}
|
|
5008
|
+
generation.seedSlugs.push(result.slug);
|
|
3259
5009
|
}
|
|
3260
5010
|
}
|
|
3261
5011
|
async function generateSingleSeedPage(root, schema, seed) {
|
|
3262
5012
|
const slug = slugify(seed.title);
|
|
3263
|
-
const pagePath =
|
|
5013
|
+
const pagePath = path33.join(root, CONCEPTS_DIR, `${slug}.md`);
|
|
3264
5014
|
const relatedContent = await loadSeedRelatedPages(root, seed.relatedSlugs ?? []);
|
|
3265
5015
|
const rule = schema.kinds[seed.kind];
|
|
3266
5016
|
const system = buildSeedPagePrompt(seed, rule, relatedContent);
|
|
@@ -3283,16 +5033,17 @@ async function generateSingleSeedPage(root, schema, seed) {
|
|
|
3283
5033
|
const frontmatterFields = { ...typedFields };
|
|
3284
5034
|
addObsidianMeta(frontmatterFields, seed.title, []);
|
|
3285
5035
|
const frontmatter = buildFrontmatter(frontmatterFields);
|
|
3286
|
-
|
|
5036
|
+
const error2 = await writePageIfValid(pagePath, `${frontmatter}
|
|
3287
5037
|
|
|
3288
5038
|
${pageBody}
|
|
3289
5039
|
`, seed.title);
|
|
5040
|
+
return error2 ? { slug, error: error2 } : { slug };
|
|
3290
5041
|
}
|
|
3291
5042
|
async function loadSeedRelatedPages(root, slugs) {
|
|
3292
5043
|
if (slugs.length === 0) return "";
|
|
3293
5044
|
const contents = [];
|
|
3294
5045
|
for (const slug of slugs) {
|
|
3295
|
-
const pagePath =
|
|
5046
|
+
const pagePath = path33.join(root, CONCEPTS_DIR, `${slug}.md`);
|
|
3296
5047
|
const content = await safeReadFile(pagePath);
|
|
3297
5048
|
if (content) contents.push(content);
|
|
3298
5049
|
}
|
|
@@ -3347,7 +5098,7 @@ async function compileCommand(options = {}) {
|
|
|
3347
5098
|
|
|
3348
5099
|
// src/commands/query.ts
|
|
3349
5100
|
import { existsSync as existsSync8 } from "fs";
|
|
3350
|
-
import
|
|
5101
|
+
import path34 from "path";
|
|
3351
5102
|
var PAGE_DIRS = [CONCEPTS_DIR, QUERIES_DIR];
|
|
3352
5103
|
var PAGE_SELECTION_TOOL = {
|
|
3353
5104
|
name: "select_pages",
|
|
@@ -3404,7 +5155,7 @@ async function selectRelevantPages(root, question, debug) {
|
|
|
3404
5155
|
const { pages: rawPages2, reasoning: reasoning2 } = await selectPages(question, filteredIndex);
|
|
3405
5156
|
return { pages: rawPages2, rawPages: rawPages2, reasoning: reasoning2, chunks: [] };
|
|
3406
5157
|
}
|
|
3407
|
-
const indexContent = await safeReadFile(
|
|
5158
|
+
const indexContent = await safeReadFile(path34.join(root, INDEX_FILE));
|
|
3408
5159
|
const { pages: rawPages, reasoning } = await selectPages(question, indexContent);
|
|
3409
5160
|
return { pages: rawPages.map((p) => slugify(p)), rawPages, reasoning, chunks: [] };
|
|
3410
5161
|
}
|
|
@@ -3496,7 +5247,7 @@ async function loadSelectedPages(root, slugs) {
|
|
|
3496
5247
|
for (const slug of slugs) {
|
|
3497
5248
|
let content = "";
|
|
3498
5249
|
for (const dir of PAGE_DIRS) {
|
|
3499
|
-
const candidate = await safeReadFile(
|
|
5250
|
+
const candidate = await safeReadFile(path34.join(root, dir, `${slug}.md`));
|
|
3500
5251
|
if (!candidate) continue;
|
|
3501
5252
|
const { meta } = parseFrontmatter(candidate);
|
|
3502
5253
|
if (meta.orphaned) continue;
|
|
@@ -3512,7 +5263,11 @@ ${content}`);
|
|
|
3512
5263
|
}
|
|
3513
5264
|
return sections.join("\n\n");
|
|
3514
5265
|
}
|
|
3515
|
-
var
|
|
5266
|
+
var ANSWER_SYSTEM_PROMPT_BASE = "You are a knowledge assistant. Answer the question using ONLY the wiki content provided. Cite specific pages using [[Page Title]] wikilinks. If the wiki doesn't contain enough information, say so.";
|
|
5267
|
+
function buildAnswerSystemPrompt() {
|
|
5268
|
+
const lang = languageDirective();
|
|
5269
|
+
return lang ? `${ANSWER_SYSTEM_PROMPT_BASE} ${lang}` : ANSWER_SYSTEM_PROMPT_BASE;
|
|
5270
|
+
}
|
|
3516
5271
|
async function callAnswerLLM(question, pagesContent, chunks, onToken) {
|
|
3517
5272
|
const provenance = chunks.length > 0 ? buildChunkProvenance(chunks) : "";
|
|
3518
5273
|
const userMessage = `Question: ${question}
|
|
@@ -3520,7 +5275,7 @@ async function callAnswerLLM(question, pagesContent, chunks, onToken) {
|
|
|
3520
5275
|
Relevant wiki pages:
|
|
3521
5276
|
${pagesContent}${provenance}`;
|
|
3522
5277
|
return callClaude({
|
|
3523
|
-
system:
|
|
5278
|
+
system: buildAnswerSystemPrompt(),
|
|
3524
5279
|
messages: [{ role: "user", content: userMessage }],
|
|
3525
5280
|
stream: Boolean(onToken),
|
|
3526
5281
|
onToken
|
|
@@ -3543,7 +5298,7 @@ function summarizeAnswer(answer) {
|
|
|
3543
5298
|
}
|
|
3544
5299
|
async function saveQueryPage(root, question, answer) {
|
|
3545
5300
|
const slug = slugify(question);
|
|
3546
|
-
const filePath =
|
|
5301
|
+
const filePath = path34.join(root, QUERIES_DIR, `${slug}.md`);
|
|
3547
5302
|
const frontmatter = buildFrontmatter({
|
|
3548
5303
|
title: question,
|
|
3549
5304
|
summary: summarizeAnswer(answer),
|
|
@@ -3569,7 +5324,7 @@ ${answer}
|
|
|
3569
5324
|
return slug;
|
|
3570
5325
|
}
|
|
3571
5326
|
async function generateAnswer(root, question, options = {}) {
|
|
3572
|
-
if (!existsSync8(
|
|
5327
|
+
if (!existsSync8(path34.join(root, INDEX_FILE))) {
|
|
3573
5328
|
throw new Error("Wiki index not found. Run `llmwiki compile` first.");
|
|
3574
5329
|
}
|
|
3575
5330
|
const selection = await selectRelevantPages(root, question, Boolean(options.debug));
|
|
@@ -3597,7 +5352,7 @@ function buildEmptyResult(selection) {
|
|
|
3597
5352
|
};
|
|
3598
5353
|
}
|
|
3599
5354
|
async function queryCommand(root, question, options) {
|
|
3600
|
-
if (!existsSync8(
|
|
5355
|
+
if (!existsSync8(path34.join(root, INDEX_FILE))) {
|
|
3601
5356
|
status("!", error("Wiki index not found. Run `llmwiki compile` first."));
|
|
3602
5357
|
return;
|
|
3603
5358
|
}
|
|
@@ -3648,10 +5403,10 @@ var DEBUG_CHUNK_PREVIEW_CHARS = 120;
|
|
|
3648
5403
|
// src/commands/watch.ts
|
|
3649
5404
|
import { watch as chokidarWatch } from "chokidar";
|
|
3650
5405
|
import { existsSync as existsSync9 } from "fs";
|
|
3651
|
-
import
|
|
5406
|
+
import path35 from "path";
|
|
3652
5407
|
var DEBOUNCE_MS = 500;
|
|
3653
5408
|
async function watchCommand() {
|
|
3654
|
-
const sourcesPath =
|
|
5409
|
+
const sourcesPath = path35.resolve(SOURCES_DIR);
|
|
3655
5410
|
if (!existsSync9(sourcesPath)) {
|
|
3656
5411
|
status(
|
|
3657
5412
|
"!",
|
|
@@ -3686,7 +5441,7 @@ async function watchCommand() {
|
|
|
3686
5441
|
const scheduleCompile = (eventPath, event) => {
|
|
3687
5442
|
status(
|
|
3688
5443
|
"~",
|
|
3689
|
-
dim(`${event}: ${
|
|
5444
|
+
dim(`${event}: ${path35.basename(eventPath)}`)
|
|
3690
5445
|
);
|
|
3691
5446
|
if (debounceTimer) clearTimeout(debounceTimer);
|
|
3692
5447
|
debounceTimer = setTimeout(triggerCompile, DEBOUNCE_MS);
|
|
@@ -3765,15 +5520,356 @@ async function lintCommand() {
|
|
|
3765
5520
|
info(`${summary.info} info`)
|
|
3766
5521
|
].join(", ");
|
|
3767
5522
|
status("*", summaryLine);
|
|
5523
|
+
await writeLintCache(process.cwd(), summary);
|
|
3768
5524
|
if (summary.errors > 0) {
|
|
3769
5525
|
process.exit(1);
|
|
3770
5526
|
}
|
|
3771
5527
|
}
|
|
3772
5528
|
|
|
5529
|
+
// src/commands/export.ts
|
|
5530
|
+
import path36 from "path";
|
|
5531
|
+
import { createRequire } from "module";
|
|
5532
|
+
|
|
5533
|
+
// src/export/collect.ts
|
|
5534
|
+
function toExportPage(raw) {
|
|
5535
|
+
const meta = raw.frontmatter;
|
|
5536
|
+
return {
|
|
5537
|
+
title: raw.title,
|
|
5538
|
+
slug: raw.slug,
|
|
5539
|
+
pageDirectory: raw.pageDirectory,
|
|
5540
|
+
summary: typeof meta.summary === "string" ? meta.summary : "",
|
|
5541
|
+
sources: Array.isArray(meta.sources) ? meta.sources.filter((s) => typeof s === "string") : [],
|
|
5542
|
+
tags: Array.isArray(meta.tags) ? meta.tags.filter((t) => typeof t === "string") : [],
|
|
5543
|
+
createdAt: typeof meta.createdAt === "string" ? meta.createdAt : (/* @__PURE__ */ new Date()).toISOString(),
|
|
5544
|
+
updatedAt: typeof meta.updatedAt === "string" ? meta.updatedAt : (/* @__PURE__ */ new Date()).toISOString(),
|
|
5545
|
+
links: extractWikilinkSlugs(raw.body),
|
|
5546
|
+
body: raw.body
|
|
5547
|
+
};
|
|
5548
|
+
}
|
|
5549
|
+
async function collectExportPages(root) {
|
|
5550
|
+
const raw = await collectRawWikiPages(root);
|
|
5551
|
+
const kept = raw.filter((page) => page.parseStatus.hasTitle && !page.parseStatus.orphaned);
|
|
5552
|
+
const pages = kept.map(toExportPage);
|
|
5553
|
+
pages.sort((a, b) => a.title.localeCompare(b.title));
|
|
5554
|
+
return pages;
|
|
5555
|
+
}
|
|
5556
|
+
|
|
5557
|
+
// src/export/llms-txt.ts
|
|
5558
|
+
function pageRelativePath(page) {
|
|
5559
|
+
return `wiki/${page.pageDirectory}/${page.slug}.md`;
|
|
5560
|
+
}
|
|
5561
|
+
function buildEntryNote(page) {
|
|
5562
|
+
const parts = [];
|
|
5563
|
+
if (page.summary) parts.push(page.summary);
|
|
5564
|
+
if (page.tags.length > 0) parts.push(`tags: ${page.tags.join(", ")}`);
|
|
5565
|
+
if (page.sources.length > 0) parts.push(`sources: ${page.sources.join(", ")}`);
|
|
5566
|
+
parts.push(`created: ${page.createdAt}`);
|
|
5567
|
+
parts.push(`updated: ${page.updatedAt}`);
|
|
5568
|
+
return parts.join(" | ");
|
|
5569
|
+
}
|
|
5570
|
+
function formatPageEntry(page) {
|
|
5571
|
+
const note = buildEntryNote(page);
|
|
5572
|
+
return `- [${page.title}](${pageRelativePath(page)}): ${note}`;
|
|
5573
|
+
}
|
|
5574
|
+
function buildSection(heading, pages) {
|
|
5575
|
+
if (pages.length === 0) return [];
|
|
5576
|
+
return [`## ${heading}`, "", ...pages.map(formatPageEntry), ""];
|
|
5577
|
+
}
|
|
5578
|
+
function buildLlmsTxt(pages, projectTitle) {
|
|
5579
|
+
const concepts = pages.filter((p) => p.pageDirectory === "concepts");
|
|
5580
|
+
const queries = pages.filter((p) => p.pageDirectory === "queries");
|
|
5581
|
+
const lines = [
|
|
5582
|
+
`# ${projectTitle}`,
|
|
5583
|
+
"",
|
|
5584
|
+
`> ${pages.length} pages \u2014 exported ${(/* @__PURE__ */ new Date()).toISOString()}`,
|
|
5585
|
+
"",
|
|
5586
|
+
...buildSection("Concepts", concepts),
|
|
5587
|
+
...buildSection("Saved Queries", queries)
|
|
5588
|
+
];
|
|
5589
|
+
return lines.join("\n");
|
|
5590
|
+
}
|
|
5591
|
+
function buildLlmsFullTxt(pages, projectTitle) {
|
|
5592
|
+
const sections = [buildLlmsTxt(pages, projectTitle)];
|
|
5593
|
+
for (const page of pages) {
|
|
5594
|
+
const tags = page.tags.length > 0 ? `
|
|
5595
|
+
Tags: ${page.tags.join(", ")}` : "";
|
|
5596
|
+
const sources = page.sources.length > 0 ? `
|
|
5597
|
+
Sources: ${page.sources.join(", ")}` : "";
|
|
5598
|
+
const header2 = [
|
|
5599
|
+
"---",
|
|
5600
|
+
`## ${page.title}`,
|
|
5601
|
+
`> ${page.summary}${tags}${sources}`,
|
|
5602
|
+
`Created: ${page.createdAt} | Updated: ${page.updatedAt}`,
|
|
5603
|
+
""
|
|
5604
|
+
].join("\n");
|
|
5605
|
+
sections.push(`${header2}
|
|
5606
|
+
${page.body.trim()}
|
|
5607
|
+
`);
|
|
5608
|
+
}
|
|
5609
|
+
return sections.join("\n");
|
|
5610
|
+
}
|
|
5611
|
+
|
|
5612
|
+
// src/export/json-export.ts
|
|
5613
|
+
function buildJsonExport(pages) {
|
|
5614
|
+
const doc = {
|
|
5615
|
+
exportedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
5616
|
+
pageCount: pages.length,
|
|
5617
|
+
pages
|
|
5618
|
+
};
|
|
5619
|
+
return JSON.stringify(doc, null, 2);
|
|
5620
|
+
}
|
|
5621
|
+
|
|
5622
|
+
// src/export/json-ld.ts
|
|
5623
|
+
var LOCAL_BASE = "urn:llmwiki:";
|
|
5624
|
+
function pageIri(slug) {
|
|
5625
|
+
return `${LOCAL_BASE}${slug}`;
|
|
5626
|
+
}
|
|
5627
|
+
function pageToJsonLd(page) {
|
|
5628
|
+
const node = {
|
|
5629
|
+
"@id": pageIri(page.slug),
|
|
5630
|
+
"@type": "Article",
|
|
5631
|
+
name: page.title,
|
|
5632
|
+
description: page.summary,
|
|
5633
|
+
dateCreated: page.createdAt,
|
|
5634
|
+
dateModified: page.updatedAt
|
|
5635
|
+
};
|
|
5636
|
+
if (page.tags.length > 0) {
|
|
5637
|
+
node["keywords"] = page.tags;
|
|
5638
|
+
}
|
|
5639
|
+
if (page.sources.length > 0) {
|
|
5640
|
+
node["isBasedOn"] = page.sources;
|
|
5641
|
+
}
|
|
5642
|
+
if (page.links.length > 0) {
|
|
5643
|
+
node["mentions"] = page.links.map((slug) => ({ "@id": pageIri(slug) }));
|
|
5644
|
+
}
|
|
5645
|
+
return node;
|
|
5646
|
+
}
|
|
5647
|
+
function buildJsonLd(pages) {
|
|
5648
|
+
const doc = {
|
|
5649
|
+
"@context": "https://schema.org",
|
|
5650
|
+
"@graph": pages.map(pageToJsonLd)
|
|
5651
|
+
};
|
|
5652
|
+
return JSON.stringify(doc, null, 2);
|
|
5653
|
+
}
|
|
5654
|
+
|
|
5655
|
+
// src/export/graphml.ts
|
|
5656
|
+
var XML_ESCAPES = {
|
|
5657
|
+
"&": "&",
|
|
5658
|
+
"<": "<",
|
|
5659
|
+
">": ">",
|
|
5660
|
+
'"': """,
|
|
5661
|
+
"'": "'"
|
|
5662
|
+
};
|
|
5663
|
+
function escapeXml(value) {
|
|
5664
|
+
return value.replace(/[&<>"']/g, (ch) => XML_ESCAPES[ch] ?? ch);
|
|
5665
|
+
}
|
|
5666
|
+
var KEY_DEFS = [
|
|
5667
|
+
'<key id="title" for="node" attr.name="title" attr.type="string"/>',
|
|
5668
|
+
'<key id="summary" for="node" attr.name="summary" attr.type="string"/>',
|
|
5669
|
+
'<key id="tags" for="node" attr.name="tags" attr.type="string"/>',
|
|
5670
|
+
'<key id="sources" for="node" attr.name="sources" attr.type="string"/>',
|
|
5671
|
+
'<key id="createdAt" for="node" attr.name="createdAt" attr.type="string"/>',
|
|
5672
|
+
'<key id="updatedAt" for="node" attr.name="updatedAt" attr.type="string"/>'
|
|
5673
|
+
].join("\n ");
|
|
5674
|
+
function pageToNode(page) {
|
|
5675
|
+
const tags = page.tags.join(", ");
|
|
5676
|
+
const sources = page.sources.join(", ");
|
|
5677
|
+
return [
|
|
5678
|
+
` <node id="${escapeXml(page.slug)}">`,
|
|
5679
|
+
` <data key="title">${escapeXml(page.title)}</data>`,
|
|
5680
|
+
` <data key="summary">${escapeXml(page.summary)}</data>`,
|
|
5681
|
+
` <data key="tags">${escapeXml(tags)}</data>`,
|
|
5682
|
+
` <data key="sources">${escapeXml(sources)}</data>`,
|
|
5683
|
+
` <data key="createdAt">${escapeXml(page.createdAt)}</data>`,
|
|
5684
|
+
` <data key="updatedAt">${escapeXml(page.updatedAt)}</data>`,
|
|
5685
|
+
` </node>`
|
|
5686
|
+
].join("\n");
|
|
5687
|
+
}
|
|
5688
|
+
function pageToEdges(page, knownSlugs) {
|
|
5689
|
+
return page.links.filter((slug) => knownSlugs.has(slug)).map(
|
|
5690
|
+
(slug) => ` <edge source="${escapeXml(page.slug)}" target="${escapeXml(slug)}"/>`
|
|
5691
|
+
);
|
|
5692
|
+
}
|
|
5693
|
+
function buildGraphml(pages) {
|
|
5694
|
+
const knownSlugs = new Set(pages.map((p) => p.slug));
|
|
5695
|
+
const nodes = pages.map(pageToNode).join("\n");
|
|
5696
|
+
const edges = pages.flatMap((p) => pageToEdges(p, knownSlugs)).join("\n");
|
|
5697
|
+
return [
|
|
5698
|
+
'<?xml version="1.0" encoding="UTF-8"?>',
|
|
5699
|
+
'<graphml xmlns="http://graphml.graphdrawing.org/graphml"',
|
|
5700
|
+
' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"',
|
|
5701
|
+
' xsi:schemaLocation="http://graphml.graphdrawing.org/graphml',
|
|
5702
|
+
' http://graphml.graphdrawing.org/graphml/1.0/graphml.xsd">',
|
|
5703
|
+
` ${KEY_DEFS}`,
|
|
5704
|
+
' <graph id="wiki" edgedefault="directed">',
|
|
5705
|
+
nodes,
|
|
5706
|
+
edges,
|
|
5707
|
+
" </graph>",
|
|
5708
|
+
"</graphml>",
|
|
5709
|
+
""
|
|
5710
|
+
].join("\n");
|
|
5711
|
+
}
|
|
5712
|
+
|
|
5713
|
+
// src/export/marp.ts
|
|
5714
|
+
var SLIDE_BODY_MAX_CHARS = 300;
|
|
5715
|
+
function extractFirstParagraph(body) {
|
|
5716
|
+
const trimmed = body.trim();
|
|
5717
|
+
const firstBlock = trimmed.split(/\n\s*\n/)[0] ?? "";
|
|
5718
|
+
const stripped = firstBlock.replace(/^#{1,6}\s+/gm, "").replace(/^[-*+]\s+/gm, "").trim();
|
|
5719
|
+
if (stripped.length <= SLIDE_BODY_MAX_CHARS) return stripped;
|
|
5720
|
+
return `${stripped.slice(0, SLIDE_BODY_MAX_CHARS)}\u2026`;
|
|
5721
|
+
}
|
|
5722
|
+
function buildSpeakerNotes(page) {
|
|
5723
|
+
const parts = [`created: ${page.createdAt}`, `updated: ${page.updatedAt}`];
|
|
5724
|
+
if (page.sources.length > 0) parts.push(`sources: ${page.sources.join(", ")}`);
|
|
5725
|
+
return `<!-- ${parts.join(" | ")} -->`;
|
|
5726
|
+
}
|
|
5727
|
+
function pageToSlide(page) {
|
|
5728
|
+
const tagLine = page.tags.length > 0 ? `
|
|
5729
|
+
_Tags: ${page.tags.join(", ")}_` : "";
|
|
5730
|
+
const excerpt = extractFirstParagraph(page.body);
|
|
5731
|
+
const notes = buildSpeakerNotes(page);
|
|
5732
|
+
return [
|
|
5733
|
+
`## ${page.title}`,
|
|
5734
|
+
"",
|
|
5735
|
+
`> ${page.summary}${tagLine}`,
|
|
5736
|
+
"",
|
|
5737
|
+
excerpt,
|
|
5738
|
+
"",
|
|
5739
|
+
notes
|
|
5740
|
+
].join("\n");
|
|
5741
|
+
}
|
|
5742
|
+
function filterBySource(pages, source2) {
|
|
5743
|
+
if (source2 === "all") return pages;
|
|
5744
|
+
return pages.filter((p) => p.pageDirectory === source2);
|
|
5745
|
+
}
|
|
5746
|
+
function buildMarp(pages, projectTitle, source2 = "all") {
|
|
5747
|
+
const filtered = filterBySource(pages, source2);
|
|
5748
|
+
const frontmatter = [
|
|
5749
|
+
"---",
|
|
5750
|
+
"marp: true",
|
|
5751
|
+
"theme: default",
|
|
5752
|
+
"paginate: true",
|
|
5753
|
+
`title: "${projectTitle}"`,
|
|
5754
|
+
"---"
|
|
5755
|
+
].join("\n");
|
|
5756
|
+
const titleSlide = [
|
|
5757
|
+
"",
|
|
5758
|
+
`# ${projectTitle}`,
|
|
5759
|
+
"",
|
|
5760
|
+
`${filtered.length} pages | ${(/* @__PURE__ */ new Date()).toISOString()}`
|
|
5761
|
+
].join("\n");
|
|
5762
|
+
const slides = filtered.map((p) => `---
|
|
5763
|
+
|
|
5764
|
+
${pageToSlide(p)}`);
|
|
5765
|
+
return [frontmatter, titleSlide, ...slides, ""].join("\n\n");
|
|
5766
|
+
}
|
|
5767
|
+
|
|
5768
|
+
// src/export/types.ts
|
|
5769
|
+
var MARP_SOURCES = ["concepts", "queries", "all"];
|
|
5770
|
+
var EXPORT_TARGETS = [
|
|
5771
|
+
"llms-txt",
|
|
5772
|
+
"llms-full-txt",
|
|
5773
|
+
"json",
|
|
5774
|
+
"json-ld",
|
|
5775
|
+
"graphml",
|
|
5776
|
+
"marp"
|
|
5777
|
+
];
|
|
5778
|
+
|
|
5779
|
+
// src/commands/export.ts
|
|
5780
|
+
var require2 = createRequire(import.meta.url);
|
|
5781
|
+
var EXPORT_DIR = "dist/exports";
|
|
5782
|
+
var TARGET_FILENAMES = {
|
|
5783
|
+
"llms-txt": "llms.txt",
|
|
5784
|
+
"llms-full-txt": "llms-full.txt",
|
|
5785
|
+
json: "wiki.json",
|
|
5786
|
+
"json-ld": "wiki.jsonld",
|
|
5787
|
+
graphml: "wiki.graphml",
|
|
5788
|
+
marp: "wiki.md"
|
|
5789
|
+
};
|
|
5790
|
+
function resolveProjectTitle(root) {
|
|
5791
|
+
try {
|
|
5792
|
+
const pkg = require2(path36.join(root, "package.json"));
|
|
5793
|
+
return typeof pkg.name === "string" ? pkg.name : "Knowledge Wiki";
|
|
5794
|
+
} catch {
|
|
5795
|
+
return "Knowledge Wiki";
|
|
5796
|
+
}
|
|
5797
|
+
}
|
|
5798
|
+
function isValidTarget(value) {
|
|
5799
|
+
return EXPORT_TARGETS.includes(value);
|
|
5800
|
+
}
|
|
5801
|
+
function isValidMarpSource(value) {
|
|
5802
|
+
return MARP_SOURCES.includes(value);
|
|
5803
|
+
}
|
|
5804
|
+
function resolveMarpSource(rawSource) {
|
|
5805
|
+
if (!rawSource) return "all";
|
|
5806
|
+
if (!isValidMarpSource(rawSource)) {
|
|
5807
|
+
throw new Error(
|
|
5808
|
+
`Unknown --source value "${rawSource}". Valid values: ${MARP_SOURCES.join(", ")}`
|
|
5809
|
+
);
|
|
5810
|
+
}
|
|
5811
|
+
return rawSource;
|
|
5812
|
+
}
|
|
5813
|
+
function buildContent(target, pages, projectTitle, marpSource) {
|
|
5814
|
+
switch (target) {
|
|
5815
|
+
case "llms-txt":
|
|
5816
|
+
return buildLlmsTxt(pages, projectTitle);
|
|
5817
|
+
case "llms-full-txt":
|
|
5818
|
+
return buildLlmsFullTxt(pages, projectTitle);
|
|
5819
|
+
case "json":
|
|
5820
|
+
return buildJsonExport(pages);
|
|
5821
|
+
case "json-ld":
|
|
5822
|
+
return buildJsonLd(pages);
|
|
5823
|
+
case "graphml":
|
|
5824
|
+
return buildGraphml(pages);
|
|
5825
|
+
case "marp":
|
|
5826
|
+
return buildMarp(pages, projectTitle, marpSource);
|
|
5827
|
+
}
|
|
5828
|
+
}
|
|
5829
|
+
function computeReportedPageCount(pages, targets, marpSource) {
|
|
5830
|
+
const onlyMarpTarget = targets.length === 1 && targets[0] === "marp";
|
|
5831
|
+
if (onlyMarpTarget && marpSource !== "all") {
|
|
5832
|
+
return pages.filter((p) => p.pageDirectory === marpSource).length;
|
|
5833
|
+
}
|
|
5834
|
+
return pages.length;
|
|
5835
|
+
}
|
|
5836
|
+
async function runExport(root, options = {}) {
|
|
5837
|
+
const pages = await collectExportPages(root);
|
|
5838
|
+
const projectTitle = resolveProjectTitle(root);
|
|
5839
|
+
const targets = resolveTargets(options.target);
|
|
5840
|
+
const marpSource = resolveMarpSource(options.source);
|
|
5841
|
+
const written = [];
|
|
5842
|
+
for (const target of targets) {
|
|
5843
|
+
const content = buildContent(target, pages, projectTitle, marpSource);
|
|
5844
|
+
const outPath = path36.join(root, EXPORT_DIR, TARGET_FILENAMES[target]);
|
|
5845
|
+
await atomicWrite(outPath, content);
|
|
5846
|
+
written.push(outPath);
|
|
5847
|
+
status("+", success(`Exported ${target} \u2192 ${source(outPath)}`));
|
|
5848
|
+
}
|
|
5849
|
+
return { written, pageCount: computeReportedPageCount(pages, targets, marpSource) };
|
|
5850
|
+
}
|
|
5851
|
+
function resolveTargets(rawTarget) {
|
|
5852
|
+
if (!rawTarget) return [...EXPORT_TARGETS];
|
|
5853
|
+
if (!isValidTarget(rawTarget)) {
|
|
5854
|
+
throw new Error(
|
|
5855
|
+
`Unknown export target "${rawTarget}". Valid targets: ${EXPORT_TARGETS.join(", ")}`
|
|
5856
|
+
);
|
|
5857
|
+
}
|
|
5858
|
+
return [rawTarget];
|
|
5859
|
+
}
|
|
5860
|
+
async function exportCommand(root, options) {
|
|
5861
|
+
header("Exporting wiki");
|
|
5862
|
+
const { written, pageCount } = await runExport(root, options);
|
|
5863
|
+
status(
|
|
5864
|
+
"\u2713",
|
|
5865
|
+
success(`Done \u2014 ${pageCount} pages exported to ${written.length} file(s).`)
|
|
5866
|
+
);
|
|
5867
|
+
}
|
|
5868
|
+
|
|
3773
5869
|
// src/commands/schema.ts
|
|
3774
5870
|
import { existsSync as existsSync10 } from "fs";
|
|
3775
|
-
import { mkdir as
|
|
3776
|
-
import
|
|
5871
|
+
import { mkdir as mkdir7, writeFile as writeFile5 } from "fs/promises";
|
|
5872
|
+
import path37 from "path";
|
|
3777
5873
|
async function schemaInitCommand() {
|
|
3778
5874
|
const root = process.cwd();
|
|
3779
5875
|
const defaults = buildDefaultSchema();
|
|
@@ -3782,7 +5878,7 @@ async function schemaInitCommand() {
|
|
|
3782
5878
|
status("!", warn(`Schema file already exists at ${targetPath}`));
|
|
3783
5879
|
return;
|
|
3784
5880
|
}
|
|
3785
|
-
await
|
|
5881
|
+
await mkdir7(path37.dirname(targetPath), { recursive: true });
|
|
3786
5882
|
const serializable = {
|
|
3787
5883
|
version: defaults.version,
|
|
3788
5884
|
defaultKind: defaults.defaultKind,
|
|
@@ -3838,10 +5934,17 @@ async function reviewShowCommand(id) {
|
|
|
3838
5934
|
status("!", warn(`[${v.severity}] ${v.message}`));
|
|
3839
5935
|
}
|
|
3840
5936
|
}
|
|
5937
|
+
if (candidate.provenanceViolations && candidate.provenanceViolations.length > 0) {
|
|
5938
|
+
console.log();
|
|
5939
|
+
header("Provenance violations");
|
|
5940
|
+
for (const v of candidate.provenanceViolations) {
|
|
5941
|
+
status("!", warn(`[${v.severity}] ${v.message}`));
|
|
5942
|
+
}
|
|
5943
|
+
}
|
|
3841
5944
|
}
|
|
3842
5945
|
|
|
3843
5946
|
// src/commands/review-approve.ts
|
|
3844
|
-
import
|
|
5947
|
+
import path38 from "path";
|
|
3845
5948
|
|
|
3846
5949
|
// src/commands/review-helpers.ts
|
|
3847
5950
|
async function runReviewUnderLock(id, underLock) {
|
|
@@ -3873,7 +5976,7 @@ async function approveUnderLock(root, id) {
|
|
|
3873
5976
|
process.exitCode = 1;
|
|
3874
5977
|
return;
|
|
3875
5978
|
}
|
|
3876
|
-
const pagePath =
|
|
5979
|
+
const pagePath = path38.join(root, CONCEPTS_DIR, `${candidate.slug}.md`);
|
|
3877
5980
|
await atomicWrite(pagePath, candidate.body);
|
|
3878
5981
|
status("+", success(`Approved \u2192 ${source(pagePath)}`));
|
|
3879
5982
|
await persistCandidateSourceStates(root, candidate);
|
|
@@ -3933,7 +6036,7 @@ import { McpServer as McpServer2 } from "@modelcontextprotocol/sdk/server/mcp.js
|
|
|
3933
6036
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
3934
6037
|
|
|
3935
6038
|
// src/mcp/tools.ts
|
|
3936
|
-
import
|
|
6039
|
+
import path39 from "path";
|
|
3937
6040
|
import { z } from "zod";
|
|
3938
6041
|
|
|
3939
6042
|
// src/mcp/provider-check.ts
|
|
@@ -3941,7 +6044,8 @@ var PROVIDER_KEY_VARS = {
|
|
|
3941
6044
|
anthropic: "ANTHROPIC_API_KEY",
|
|
3942
6045
|
openai: "OPENAI_API_KEY",
|
|
3943
6046
|
ollama: null,
|
|
3944
|
-
minimax: "MINIMAX_API_KEY"
|
|
6047
|
+
minimax: "MINIMAX_API_KEY",
|
|
6048
|
+
copilot: "GITHUB_TOKEN"
|
|
3945
6049
|
};
|
|
3946
6050
|
function ensureProviderAvailable() {
|
|
3947
6051
|
const provider = process.env.LLMWIKI_PROVIDER ?? DEFAULT_PROVIDER;
|
|
@@ -4069,7 +6173,7 @@ async function pickSearchSlugs(root, question) {
|
|
|
4069
6173
|
if (candidates.length > 0) return candidates.map((c) => c.slug);
|
|
4070
6174
|
} catch {
|
|
4071
6175
|
}
|
|
4072
|
-
const indexContent = await safeReadFile(
|
|
6176
|
+
const indexContent = await safeReadFile(path39.join(root, INDEX_FILE));
|
|
4073
6177
|
const { pages } = await selectPages(question, indexContent);
|
|
4074
6178
|
return pages;
|
|
4075
6179
|
}
|
|
@@ -4128,8 +6232,8 @@ function registerStatusTool(server, root) {
|
|
|
4128
6232
|
);
|
|
4129
6233
|
}
|
|
4130
6234
|
async function collectStatus(root) {
|
|
4131
|
-
const concepts = await collectPageSummaries(
|
|
4132
|
-
const queries = await collectPageSummaries(
|
|
6235
|
+
const concepts = await collectPageSummaries(path39.join(root, CONCEPTS_DIR));
|
|
6236
|
+
const queries = await collectPageSummaries(path39.join(root, QUERIES_DIR));
|
|
4133
6237
|
const state = await readState(root);
|
|
4134
6238
|
const changes = await detectChanges(root, state);
|
|
4135
6239
|
const orphans = await findOrphanedSlugs(root);
|
|
@@ -4146,7 +6250,7 @@ async function collectStatus(root) {
|
|
|
4146
6250
|
};
|
|
4147
6251
|
}
|
|
4148
6252
|
async function findOrphanedSlugs(root) {
|
|
4149
|
-
const scanned = await scanWikiPages(
|
|
6253
|
+
const scanned = await scanWikiPages(path39.join(root, CONCEPTS_DIR));
|
|
4150
6254
|
return scanned.filter(({ meta }) => meta.orphaned).map(({ slug }) => slug);
|
|
4151
6255
|
}
|
|
4152
6256
|
async function loadPageRecords(root, slugs) {
|
|
@@ -4159,7 +6263,7 @@ async function loadPageRecords(root, slugs) {
|
|
|
4159
6263
|
}
|
|
4160
6264
|
async function readPage(root, slug) {
|
|
4161
6265
|
for (const dir of PAGE_DIRS2) {
|
|
4162
|
-
const content = await safeReadFile(
|
|
6266
|
+
const content = await safeReadFile(path39.join(root, dir, `${slug}.md`));
|
|
4163
6267
|
if (!content) continue;
|
|
4164
6268
|
const { meta, body } = parseFrontmatter(content);
|
|
4165
6269
|
if (meta.orphaned) continue;
|
|
@@ -4174,8 +6278,8 @@ async function readPage(root, slug) {
|
|
|
4174
6278
|
}
|
|
4175
6279
|
|
|
4176
6280
|
// src/mcp/resources.ts
|
|
4177
|
-
import
|
|
4178
|
-
import { readdir as
|
|
6281
|
+
import path40 from "path";
|
|
6282
|
+
import { readdir as readdir12 } from "fs/promises";
|
|
4179
6283
|
import { ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
4180
6284
|
function jsonContent(uri, payload) {
|
|
4181
6285
|
return {
|
|
@@ -4208,7 +6312,7 @@ function registerIndexResource(server, root) {
|
|
|
4208
6312
|
mimeType: "text/markdown"
|
|
4209
6313
|
},
|
|
4210
6314
|
async (uri) => {
|
|
4211
|
-
const content = await safeReadFile(
|
|
6315
|
+
const content = await safeReadFile(path40.join(root, INDEX_FILE));
|
|
4212
6316
|
return { contents: [markdownContent(uri, content)] };
|
|
4213
6317
|
}
|
|
4214
6318
|
);
|
|
@@ -4275,23 +6379,23 @@ function registerQueryResource(server, root) {
|
|
|
4275
6379
|
);
|
|
4276
6380
|
}
|
|
4277
6381
|
async function listSources(root) {
|
|
4278
|
-
const sourcesPath =
|
|
6382
|
+
const sourcesPath = path40.join(root, SOURCES_DIR);
|
|
4279
6383
|
let files;
|
|
4280
6384
|
try {
|
|
4281
|
-
files = await
|
|
6385
|
+
files = await readdir12(sourcesPath);
|
|
4282
6386
|
} catch {
|
|
4283
6387
|
return [];
|
|
4284
6388
|
}
|
|
4285
6389
|
const records = [];
|
|
4286
6390
|
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
4287
|
-
const content = await safeReadFile(
|
|
6391
|
+
const content = await safeReadFile(path40.join(sourcesPath, file));
|
|
4288
6392
|
const { meta } = parseFrontmatter(content);
|
|
4289
6393
|
records.push({ filename: file, ...meta });
|
|
4290
6394
|
}
|
|
4291
6395
|
return records;
|
|
4292
6396
|
}
|
|
4293
6397
|
async function loadPageWithMeta(root, dir, slug) {
|
|
4294
|
-
const filePath =
|
|
6398
|
+
const filePath = path40.join(root, dir, `${slug}.md`);
|
|
4295
6399
|
const content = await safeReadFile(filePath);
|
|
4296
6400
|
if (!content) {
|
|
4297
6401
|
throw new Error(`Page not found: ${dir}/${slug}.md`);
|
|
@@ -4300,10 +6404,10 @@ async function loadPageWithMeta(root, dir, slug) {
|
|
|
4300
6404
|
return { slug, meta, body: body.trim() };
|
|
4301
6405
|
}
|
|
4302
6406
|
async function listPagesUnder(root, dir, scheme) {
|
|
4303
|
-
const pagesPath =
|
|
6407
|
+
const pagesPath = path40.join(root, dir);
|
|
4304
6408
|
let files;
|
|
4305
6409
|
try {
|
|
4306
|
-
files = await
|
|
6410
|
+
files = await readdir12(pagesPath);
|
|
4307
6411
|
} catch {
|
|
4308
6412
|
return { resources: [] };
|
|
4309
6413
|
}
|
|
@@ -4327,8 +6431,8 @@ async function startMCPServer(options) {
|
|
|
4327
6431
|
}
|
|
4328
6432
|
|
|
4329
6433
|
// src/cli.ts
|
|
4330
|
-
var
|
|
4331
|
-
var { version } =
|
|
6434
|
+
var require3 = createRequire2(import.meta.url);
|
|
6435
|
+
var { version } = require3("../package.json");
|
|
4332
6436
|
var program = new Command();
|
|
4333
6437
|
program.name("llmwiki").description("The knowledge compiler \u2014 raw sources in, interlinked wiki out").version(version);
|
|
4334
6438
|
program.command("ingest <source>").description("Ingest a URL or local file into sources/").action(async (source2) => {
|
|
@@ -4339,11 +6443,31 @@ program.command("ingest <source>").description("Ingest a URL or local file into
|
|
|
4339
6443
|
process.exit(1);
|
|
4340
6444
|
}
|
|
4341
6445
|
});
|
|
6446
|
+
program.command("ingest-session <path>").description("Ingest a coding-agent session export (Claude, Codex, Cursor) into sources/").action(async (targetPath) => {
|
|
6447
|
+
try {
|
|
6448
|
+
await ingestSession(targetPath);
|
|
6449
|
+
} catch (err) {
|
|
6450
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
6451
|
+
process.exit(1);
|
|
6452
|
+
}
|
|
6453
|
+
});
|
|
6454
|
+
program.command("view").description("Start a local read-only web viewer for the current wiki project").option("--port <port>", "Port to bind (default 0 \u2014 OS-assigned)").option("--host <host>", "Host to bind (requires --allow-lan; default 127.0.0.1)").option("--allow-lan", "Bind beyond loopback (requires --host); off by default for privacy").option("--open", "Open the viewer in the default browser after startup").action(async (options) => {
|
|
6455
|
+
try {
|
|
6456
|
+
await viewCommand(options);
|
|
6457
|
+
} catch (err) {
|
|
6458
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
6459
|
+
process.exit(1);
|
|
6460
|
+
}
|
|
6461
|
+
});
|
|
4342
6462
|
program.command("compile").description("Compile sources/ into an interlinked wiki").option(
|
|
4343
6463
|
"--review",
|
|
4344
6464
|
"Write generated pages as review candidates under .llmwiki/candidates/ instead of mutating wiki/. Orphan-marking for deleted sources is deferred until the next non-review compile."
|
|
6465
|
+
).option(
|
|
6466
|
+
"--lang <code>",
|
|
6467
|
+
'Target language for generated wiki content (e.g. "Chinese", "ja", "zh-CN"). Equivalent to setting LLMWIKI_OUTPUT_LANG.'
|
|
4345
6468
|
).action(async (options) => {
|
|
4346
6469
|
try {
|
|
6470
|
+
applyLanguageOption(options.lang);
|
|
4347
6471
|
requireProvider();
|
|
4348
6472
|
await compileCommand({ review: options.review });
|
|
4349
6473
|
} catch (err) {
|
|
@@ -4384,15 +6508,21 @@ reviewCommand.command("reject <id>").description("Reject a candidate and archive
|
|
|
4384
6508
|
process.exit(1);
|
|
4385
6509
|
}
|
|
4386
6510
|
});
|
|
4387
|
-
program.command("query <question>").description("Ask a question against the wiki").option("--save", "Save the answer as a wiki page").option("--debug", "Print which pages and chunks were selected and their scores").
|
|
4388
|
-
|
|
4389
|
-
|
|
4390
|
-
|
|
4391
|
-
|
|
4392
|
-
|
|
4393
|
-
|
|
6511
|
+
program.command("query <question>").description("Ask a question against the wiki").option("--save", "Save the answer as a wiki page").option("--debug", "Print which pages and chunks were selected and their scores").option(
|
|
6512
|
+
"--lang <code>",
|
|
6513
|
+
'Target language for the answer (e.g. "Chinese", "ja", "zh-CN"). Equivalent to setting LLMWIKI_OUTPUT_LANG.'
|
|
6514
|
+
).action(
|
|
6515
|
+
async (question, options) => {
|
|
6516
|
+
try {
|
|
6517
|
+
applyLanguageOption(options.lang);
|
|
6518
|
+
requireProvider();
|
|
6519
|
+
await queryCommand(process.cwd(), question, options);
|
|
6520
|
+
} catch (err) {
|
|
6521
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
6522
|
+
process.exit(1);
|
|
6523
|
+
}
|
|
4394
6524
|
}
|
|
4395
|
-
|
|
6525
|
+
);
|
|
4396
6526
|
program.command("watch").description("Watch sources/ and auto-recompile on changes").action(async () => {
|
|
4397
6527
|
try {
|
|
4398
6528
|
requireProvider();
|
|
@@ -4427,6 +6557,17 @@ schemaCmd.command("show").description("Print the resolved schema for this projec
|
|
|
4427
6557
|
process.exit(1);
|
|
4428
6558
|
}
|
|
4429
6559
|
});
|
|
6560
|
+
program.command("export").description("Export wiki content to portable formats (llms.txt, JSON, GraphML, Marp, \u2026)").option("--target <name>", "Limit export to a single target format").option(
|
|
6561
|
+
"--source <kind>",
|
|
6562
|
+
"For marp target: which pages to include \u2014 concepts, queries, or all (default: all)"
|
|
6563
|
+
).action(async (options) => {
|
|
6564
|
+
try {
|
|
6565
|
+
await exportCommand(process.cwd(), options);
|
|
6566
|
+
} catch (err) {
|
|
6567
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
6568
|
+
process.exit(1);
|
|
6569
|
+
}
|
|
6570
|
+
});
|
|
4430
6571
|
program.command("serve").description("Start an MCP server exposing wiki tools and resources over stdio").option("--root <dir>", "Project root directory", process.cwd()).action(async (options) => {
|
|
4431
6572
|
try {
|
|
4432
6573
|
await startMCPServer({ root: options.root, version });
|
|
@@ -4435,11 +6576,17 @@ program.command("serve").description("Start an MCP server exposing wiki tools an
|
|
|
4435
6576
|
process.exit(1);
|
|
4436
6577
|
}
|
|
4437
6578
|
});
|
|
6579
|
+
function applyLanguageOption(lang) {
|
|
6580
|
+
if (lang && lang.trim().length > 0) {
|
|
6581
|
+
process.env.LLMWIKI_OUTPUT_LANG = lang.trim();
|
|
6582
|
+
}
|
|
6583
|
+
}
|
|
4438
6584
|
var PROVIDER_KEY_VARS2 = {
|
|
4439
6585
|
anthropic: "ANTHROPIC_API_KEY",
|
|
4440
6586
|
openai: "OPENAI_API_KEY",
|
|
4441
6587
|
ollama: null,
|
|
4442
|
-
minimax: "MINIMAX_API_KEY"
|
|
6588
|
+
minimax: "MINIMAX_API_KEY",
|
|
6589
|
+
copilot: "GITHUB_TOKEN"
|
|
4443
6590
|
};
|
|
4444
6591
|
function requireProvider() {
|
|
4445
6592
|
const provider = process.env.LLMWIKI_PROVIDER ?? DEFAULT_PROVIDER;
|