llm-wiki-compiler 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +163 -9
- package/dist/cli.js +1478 -417
- package/dist/cli.js.map +1 -1
- package/package.json +5 -2
package/dist/cli.js
CHANGED
|
@@ -13,6 +13,14 @@ import { mkdir as mkdir2, writeFile as writeFile2 } from "fs/promises";
|
|
|
13
13
|
import { writeFile, rename, readFile, mkdir } from "fs/promises";
|
|
14
14
|
import path from "path";
|
|
15
15
|
import yaml from "js-yaml";
|
|
16
|
+
var SPAN_SUFFIX_PATTERN = /^(?<file>[^:#]+)(?:(?::(?<colonStart>\d+)(?:-(?<colonEnd>\d+))?)|(?:#L(?<hashStart>\d+)(?:-L(?<hashEnd>\d+))?))?$/;
|
|
17
|
+
var MIN_LINE_NUMBER = 1;
|
|
18
|
+
var VALID_PROVENANCE_STATES = /* @__PURE__ */ new Set([
|
|
19
|
+
"extracted",
|
|
20
|
+
"merged",
|
|
21
|
+
"inferred",
|
|
22
|
+
"ambiguous"
|
|
23
|
+
]);
|
|
16
24
|
function slugify(title) {
|
|
17
25
|
return title.toLowerCase().replace(/['']/g, "").replace(/[^\w\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
|
|
18
26
|
}
|
|
@@ -43,6 +51,23 @@ async function atomicWrite(filePath, content) {
|
|
|
43
51
|
await writeFile(tmpPath, content, "utf-8");
|
|
44
52
|
await rename(tmpPath, filePath);
|
|
45
53
|
}
|
|
54
|
+
function isValidLineRange(start, end) {
|
|
55
|
+
return start >= MIN_LINE_NUMBER && end >= start;
|
|
56
|
+
}
|
|
57
|
+
function isMalformedCitationEntry(entry) {
|
|
58
|
+
const trimmed = entry.trim();
|
|
59
|
+
if (trimmed.length === 0) return true;
|
|
60
|
+
if (!trimmed.includes(":") && !trimmed.includes("#")) return false;
|
|
61
|
+
const match = SPAN_SUFFIX_PATTERN.exec(trimmed);
|
|
62
|
+
if (!match || !match.groups) return true;
|
|
63
|
+
const { colonStart, colonEnd, hashStart, hashEnd } = match.groups;
|
|
64
|
+
const start = colonStart ?? hashStart;
|
|
65
|
+
const end = colonEnd ?? hashEnd;
|
|
66
|
+
if (start === void 0) return false;
|
|
67
|
+
const startLine = Number(start);
|
|
68
|
+
const endLine = end === void 0 ? startLine : Number(end);
|
|
69
|
+
return !isValidLineRange(startLine, endLine);
|
|
70
|
+
}
|
|
46
71
|
async function safeReadFile(filePath) {
|
|
47
72
|
try {
|
|
48
73
|
return await readFile(filePath, "utf-8");
|
|
@@ -50,6 +75,46 @@ async function safeReadFile(filePath) {
|
|
|
50
75
|
return "";
|
|
51
76
|
}
|
|
52
77
|
}
|
|
78
|
+
function parseConfidence(raw) {
|
|
79
|
+
if (typeof raw !== "number" || !Number.isFinite(raw)) return void 0;
|
|
80
|
+
if (raw < 0) return 0;
|
|
81
|
+
if (raw > 1) return 1;
|
|
82
|
+
return raw;
|
|
83
|
+
}
|
|
84
|
+
function parseProvenanceState(raw) {
|
|
85
|
+
if (typeof raw !== "string") return void 0;
|
|
86
|
+
return VALID_PROVENANCE_STATES.has(raw) ? raw : void 0;
|
|
87
|
+
}
|
|
88
|
+
function coerceContradictionEntry(entry) {
|
|
89
|
+
if (typeof entry === "string" && entry.trim().length > 0) {
|
|
90
|
+
return { slug: entry.trim() };
|
|
91
|
+
}
|
|
92
|
+
if (entry && typeof entry === "object" && "slug" in entry) {
|
|
93
|
+
const obj = entry;
|
|
94
|
+
if (typeof obj.slug !== "string" || obj.slug.trim().length === 0) return null;
|
|
95
|
+
const ref = { slug: obj.slug.trim() };
|
|
96
|
+
if (typeof obj.reason === "string") ref.reason = obj.reason;
|
|
97
|
+
return ref;
|
|
98
|
+
}
|
|
99
|
+
return null;
|
|
100
|
+
}
|
|
101
|
+
function parseContradictedBy(raw) {
|
|
102
|
+
if (!Array.isArray(raw)) return void 0;
|
|
103
|
+
const refs = raw.map(coerceContradictionEntry).filter((ref) => ref !== null);
|
|
104
|
+
return refs.length > 0 ? refs : void 0;
|
|
105
|
+
}
|
|
106
|
+
function parseInferredParagraphs(raw) {
|
|
107
|
+
if (typeof raw !== "number" || !Number.isInteger(raw) || raw < 0) return void 0;
|
|
108
|
+
return raw;
|
|
109
|
+
}
|
|
110
|
+
function parseProvenanceMetadata(meta) {
|
|
111
|
+
return {
|
|
112
|
+
confidence: parseConfidence(meta.confidence),
|
|
113
|
+
provenanceState: parseProvenanceState(meta.provenanceState),
|
|
114
|
+
contradictedBy: parseContradictedBy(meta.contradictedBy),
|
|
115
|
+
inferredParagraphs: parseInferredParagraphs(meta.inferredParagraphs)
|
|
116
|
+
};
|
|
117
|
+
}
|
|
53
118
|
function validateWikiPage(content) {
|
|
54
119
|
if (!content || content.trim().length === 0) return false;
|
|
55
120
|
const { meta, body } = parseFrontmatter(content);
|
|
@@ -74,6 +139,8 @@ var PROVIDER_MODELS = {
|
|
|
74
139
|
minimax: "MiniMax-M2.7"
|
|
75
140
|
};
|
|
76
141
|
var OLLAMA_DEFAULT_HOST = "http://localhost:11434/v1";
|
|
142
|
+
var OPENAI_DEFAULT_TIMEOUT_MS = 10 * 60 * 1e3;
|
|
143
|
+
var OLLAMA_DEFAULT_TIMEOUT_MS = 30 * 60 * 1e3;
|
|
77
144
|
var SOURCES_DIR = "sources";
|
|
78
145
|
var CONCEPTS_DIR = "wiki/concepts";
|
|
79
146
|
var QUERIES_DIR = "wiki/queries";
|
|
@@ -83,7 +150,11 @@ var LOCK_FILE = ".llmwiki/lock";
|
|
|
83
150
|
var INDEX_FILE = "wiki/index.md";
|
|
84
151
|
var MOC_FILE = "wiki/MOC.md";
|
|
85
152
|
var EMBEDDINGS_FILE = ".llmwiki/embeddings.json";
|
|
153
|
+
var CANDIDATES_DIR = ".llmwiki/candidates";
|
|
154
|
+
var CANDIDATES_ARCHIVE_DIR = ".llmwiki/candidates/archive";
|
|
86
155
|
var EMBEDDING_TOP_K = 15;
|
|
156
|
+
var LOW_CONFIDENCE_THRESHOLD = 0.5;
|
|
157
|
+
var MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS = 2;
|
|
87
158
|
var EMBEDDING_MODELS = {
|
|
88
159
|
anthropic: "voyage-3-lite",
|
|
89
160
|
openai: "text-embedding-3-small",
|
|
@@ -274,11 +345,11 @@ async function ingest(source2) {
|
|
|
274
345
|
}
|
|
275
346
|
|
|
276
347
|
// src/commands/compile.ts
|
|
277
|
-
import { existsSync as
|
|
348
|
+
import { existsSync as existsSync7 } from "fs";
|
|
278
349
|
|
|
279
350
|
// src/compiler/index.ts
|
|
280
|
-
import { readFile as
|
|
281
|
-
import
|
|
351
|
+
import { readFile as readFile10 } from "fs/promises";
|
|
352
|
+
import path18 from "path";
|
|
282
353
|
|
|
283
354
|
// src/utils/state.ts
|
|
284
355
|
import { readFile as readFile3, writeFile as writeFile3, rename as rename2, mkdir as mkdir3, copyFile } from "fs/promises";
|
|
@@ -321,6 +392,78 @@ async function removeSourceState(root, sourceFile) {
|
|
|
321
392
|
await writeState(root, state);
|
|
322
393
|
}
|
|
323
394
|
|
|
395
|
+
// src/compiler/source-state.ts
|
|
396
|
+
import path6 from "path";
|
|
397
|
+
|
|
398
|
+
// src/compiler/hasher.ts
|
|
399
|
+
import { createHash } from "crypto";
|
|
400
|
+
import { readFile as readFile4, readdir } from "fs/promises";
|
|
401
|
+
import path5 from "path";
|
|
402
|
+
async function hashFile(filePath) {
|
|
403
|
+
const content = await readFile4(filePath, "utf-8");
|
|
404
|
+
return createHash("sha256").update(content).digest("hex");
|
|
405
|
+
}
|
|
406
|
+
async function detectChanges(root, prevState) {
|
|
407
|
+
const sourcesPath = path5.join(root, SOURCES_DIR);
|
|
408
|
+
const currentFiles = await listSourceFiles(sourcesPath);
|
|
409
|
+
const changes = [];
|
|
410
|
+
for (const file of currentFiles) {
|
|
411
|
+
const status2 = await classifyFile(root, file, prevState);
|
|
412
|
+
changes.push({ file, status: status2 });
|
|
413
|
+
}
|
|
414
|
+
const deletedChanges = findDeletedFiles(currentFiles, prevState);
|
|
415
|
+
changes.push(...deletedChanges);
|
|
416
|
+
return changes;
|
|
417
|
+
}
|
|
418
|
+
async function listSourceFiles(sourcesPath) {
|
|
419
|
+
try {
|
|
420
|
+
const entries = await readdir(sourcesPath);
|
|
421
|
+
return entries.filter((f) => f.endsWith(".md"));
|
|
422
|
+
} catch {
|
|
423
|
+
return [];
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
async function classifyFile(root, file, prevState) {
|
|
427
|
+
const filePath = path5.join(root, SOURCES_DIR, file);
|
|
428
|
+
const hash = await hashFile(filePath);
|
|
429
|
+
const prev = prevState.sources[file];
|
|
430
|
+
if (!prev) return "new";
|
|
431
|
+
if (prev.hash !== hash) return "changed";
|
|
432
|
+
return "unchanged";
|
|
433
|
+
}
|
|
434
|
+
function findDeletedFiles(currentFiles, prevState) {
|
|
435
|
+
const currentSet = new Set(currentFiles);
|
|
436
|
+
return Object.keys(prevState.sources).filter((file) => !currentSet.has(file)).map((file) => ({ file, status: "deleted" }));
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
// src/compiler/source-state.ts
|
|
440
|
+
async function buildExtractionSourceStates(root, extractions) {
|
|
441
|
+
const snapshot = {};
|
|
442
|
+
const compiledAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
443
|
+
for (const result of extractions) {
|
|
444
|
+
if (result.concepts.length === 0) continue;
|
|
445
|
+
snapshot[result.sourceFile] = await buildEntry(root, result, compiledAt);
|
|
446
|
+
}
|
|
447
|
+
return snapshot;
|
|
448
|
+
}
|
|
449
|
+
async function buildEntry(root, result, compiledAt) {
|
|
450
|
+
const filePath = path6.join(root, SOURCES_DIR, result.sourceFile);
|
|
451
|
+
const hash = await hashFile(filePath);
|
|
452
|
+
return {
|
|
453
|
+
hash,
|
|
454
|
+
concepts: result.concepts.map((concept) => slugify(concept.concept)),
|
|
455
|
+
compiledAt
|
|
456
|
+
};
|
|
457
|
+
}
|
|
458
|
+
function pickStatesForSources(allStates, sourceFiles) {
|
|
459
|
+
const picked = {};
|
|
460
|
+
for (const file of sourceFiles) {
|
|
461
|
+
const entry = allStates[file];
|
|
462
|
+
if (entry) picked[file] = entry;
|
|
463
|
+
}
|
|
464
|
+
return picked;
|
|
465
|
+
}
|
|
466
|
+
|
|
324
467
|
// src/providers/anthropic.ts
|
|
325
468
|
import Anthropic from "@anthropic-ai/sdk";
|
|
326
469
|
var VOYAGE_EMBEDDINGS_URL = "https://api.voyageai.com/v1/embeddings";
|
|
@@ -434,6 +577,15 @@ var AnthropicProvider = class {
|
|
|
434
577
|
|
|
435
578
|
// src/providers/openai.ts
|
|
436
579
|
import OpenAI from "openai";
|
|
580
|
+
function readTimeoutEnv(name) {
|
|
581
|
+
const raw = process.env[name]?.trim();
|
|
582
|
+
if (!raw) return void 0;
|
|
583
|
+
const parsed = Number(raw);
|
|
584
|
+
return Number.isFinite(parsed) && parsed > 0 ? parsed : void 0;
|
|
585
|
+
}
|
|
586
|
+
function resolveOpenAITimeoutMs() {
|
|
587
|
+
return readTimeoutEnv("LLMWIKI_REQUEST_TIMEOUT_MS");
|
|
588
|
+
}
|
|
437
589
|
function translateToolToOpenAI(tool) {
|
|
438
590
|
return {
|
|
439
591
|
type: "function",
|
|
@@ -446,14 +598,20 @@ function translateToolToOpenAI(tool) {
|
|
|
446
598
|
}
|
|
447
599
|
var OpenAIProvider = class {
|
|
448
600
|
client;
|
|
601
|
+
embeddingsClient;
|
|
449
602
|
model;
|
|
450
|
-
|
|
603
|
+
configuredEmbeddingModel;
|
|
604
|
+
constructor(model, options = {}) {
|
|
451
605
|
this.model = model;
|
|
452
|
-
|
|
606
|
+
this.configuredEmbeddingModel = options.embeddingModel;
|
|
607
|
+
const resolvedKey = options.apiKey ?? process.env.OPENAI_API_KEY ?? "";
|
|
608
|
+
const timeout = options.timeoutMs ?? resolveOpenAITimeoutMs() ?? OPENAI_DEFAULT_TIMEOUT_MS;
|
|
453
609
|
this.client = new OpenAI({
|
|
454
610
|
apiKey: resolvedKey,
|
|
455
|
-
|
|
611
|
+
baseURL: options.baseURL ?? null,
|
|
612
|
+
timeout
|
|
456
613
|
});
|
|
614
|
+
this.embeddingsClient = options.embeddingsBaseURL ? new OpenAI({ apiKey: resolvedKey, baseURL: options.embeddingsBaseURL, timeout }) : this.client;
|
|
457
615
|
}
|
|
458
616
|
/** Send a single non-streaming completion request. */
|
|
459
617
|
async complete(system, messages, maxTokens) {
|
|
@@ -502,7 +660,7 @@ var OpenAIProvider = class {
|
|
|
502
660
|
* Subclasses (e.g. Ollama) override embeddingModel() to pick a different model.
|
|
503
661
|
*/
|
|
504
662
|
async embed(text) {
|
|
505
|
-
const response = await this.
|
|
663
|
+
const response = await this.embeddingsClient.embeddings.create({
|
|
506
664
|
model: this.embeddingModel(),
|
|
507
665
|
input: text
|
|
508
666
|
});
|
|
@@ -514,18 +672,27 @@ var OpenAIProvider = class {
|
|
|
514
672
|
}
|
|
515
673
|
/** Default embedding model for this provider. Subclasses may override. */
|
|
516
674
|
embeddingModel() {
|
|
517
|
-
return EMBEDDING_MODELS.openai;
|
|
675
|
+
return this.configuredEmbeddingModel ?? EMBEDDING_MODELS.openai;
|
|
518
676
|
}
|
|
519
677
|
};
|
|
520
678
|
|
|
521
679
|
// src/providers/ollama.ts
|
|
680
|
+
function resolveOllamaTimeoutMs(explicit) {
|
|
681
|
+
return explicit ?? readTimeoutEnv("OLLAMA_TIMEOUT_MS") ?? readTimeoutEnv("LLMWIKI_REQUEST_TIMEOUT_MS") ?? OLLAMA_DEFAULT_TIMEOUT_MS;
|
|
682
|
+
}
|
|
522
683
|
var OllamaProvider = class extends OpenAIProvider {
|
|
523
|
-
constructor(model,
|
|
524
|
-
super(model,
|
|
684
|
+
constructor(model, options) {
|
|
685
|
+
super(model, {
|
|
686
|
+
baseURL: options.baseURL,
|
|
687
|
+
apiKey: "ollama",
|
|
688
|
+
embeddingsBaseURL: options.embeddingsBaseURL,
|
|
689
|
+
embeddingModel: options.embeddingModel,
|
|
690
|
+
timeoutMs: resolveOllamaTimeoutMs(options.timeoutMs)
|
|
691
|
+
});
|
|
525
692
|
}
|
|
526
693
|
/** Ollama ships a dedicated embedding model (nomic-embed-text). */
|
|
527
694
|
embeddingModel() {
|
|
528
|
-
return EMBEDDING_MODELS.ollama;
|
|
695
|
+
return this.configuredEmbeddingModel ?? EMBEDDING_MODELS.ollama;
|
|
529
696
|
}
|
|
530
697
|
};
|
|
531
698
|
|
|
@@ -533,14 +700,14 @@ var OllamaProvider = class extends OpenAIProvider {
|
|
|
533
700
|
var MINIMAX_BASE_URL = "https://api.minimax.io/v1";
|
|
534
701
|
var MiniMaxProvider = class extends OpenAIProvider {
|
|
535
702
|
constructor(model, apiKey) {
|
|
536
|
-
super(model, MINIMAX_BASE_URL, apiKey);
|
|
703
|
+
super(model, { baseURL: MINIMAX_BASE_URL, apiKey });
|
|
537
704
|
}
|
|
538
705
|
};
|
|
539
706
|
|
|
540
707
|
// src/utils/claude-settings.ts
|
|
541
708
|
import { readFileSync } from "fs";
|
|
542
709
|
import { homedir } from "os";
|
|
543
|
-
import
|
|
710
|
+
import path7 from "path";
|
|
544
711
|
var CLAUDE_SETTINGS_PATH_ENV = "LLMWIKI_CLAUDE_SETTINGS_PATH";
|
|
545
712
|
function isRecord(value) {
|
|
546
713
|
return typeof value === "object" && value !== null;
|
|
@@ -551,7 +718,7 @@ function normalize(value) {
|
|
|
551
718
|
return trimmed.length > 0 ? trimmed : void 0;
|
|
552
719
|
}
|
|
553
720
|
function resolveClaudeSettingsPath(env) {
|
|
554
|
-
return env[CLAUDE_SETTINGS_PATH_ENV] ??
|
|
721
|
+
return env[CLAUDE_SETTINGS_PATH_ENV] ?? path7.join(homedir(), ".claude", "settings.json");
|
|
555
722
|
}
|
|
556
723
|
function readClaudeSettingsFile(settingsPath) {
|
|
557
724
|
try {
|
|
@@ -640,18 +807,27 @@ function getProvider() {
|
|
|
640
807
|
case "anthropic":
|
|
641
808
|
return getAnthropicProvider();
|
|
642
809
|
case "openai":
|
|
643
|
-
return new OpenAIProvider(getModelForProvider("openai")
|
|
810
|
+
return new OpenAIProvider(getModelForProvider("openai"), {
|
|
811
|
+
baseURL: readOptionalEnv("OPENAI_BASE_URL"),
|
|
812
|
+
embeddingsBaseURL: readOptionalEnv("OPENAI_EMBEDDINGS_BASE_URL"),
|
|
813
|
+
embeddingModel: readOptionalEnv("LLMWIKI_EMBEDDING_MODEL")
|
|
814
|
+
});
|
|
644
815
|
case "ollama":
|
|
645
|
-
return new OllamaProvider(
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
816
|
+
return new OllamaProvider(getModelForProvider("ollama"), {
|
|
817
|
+
baseURL: readOptionalEnv("OLLAMA_HOST") ?? OLLAMA_DEFAULT_HOST,
|
|
818
|
+
embeddingsBaseURL: readOptionalEnv("OLLAMA_EMBEDDINGS_HOST"),
|
|
819
|
+
embeddingModel: readOptionalEnv("LLMWIKI_EMBEDDING_MODEL")
|
|
820
|
+
});
|
|
649
821
|
case "minimax":
|
|
650
822
|
return getMiniMaxProvider();
|
|
651
823
|
default:
|
|
652
824
|
throw new Error(`Unhandled provider: ${providerName}`);
|
|
653
825
|
}
|
|
654
826
|
}
|
|
827
|
+
function readOptionalEnv(name) {
|
|
828
|
+
const value = process.env[name]?.trim();
|
|
829
|
+
return value ? value : void 0;
|
|
830
|
+
}
|
|
655
831
|
function getModelForProvider(providerName) {
|
|
656
832
|
return process.env.LLMWIKI_MODEL ?? PROVIDER_MODELS[providerName];
|
|
657
833
|
}
|
|
@@ -715,8 +891,8 @@ async function callClaude(options) {
|
|
|
715
891
|
}
|
|
716
892
|
|
|
717
893
|
// src/utils/lock.ts
|
|
718
|
-
import { open, readFile as
|
|
719
|
-
import
|
|
894
|
+
import { open, readFile as readFile5, unlink, mkdir as mkdir4 } from "fs/promises";
|
|
895
|
+
import path8 from "path";
|
|
720
896
|
var RECLAIM_SUFFIX = ".reclaim";
|
|
721
897
|
var MAX_ACQUIRE_ATTEMPTS = 2;
|
|
722
898
|
function isProcessAlive(pid) {
|
|
@@ -728,8 +904,8 @@ function isProcessAlive(pid) {
|
|
|
728
904
|
}
|
|
729
905
|
}
|
|
730
906
|
async function acquireLock(root) {
|
|
731
|
-
const lockPath =
|
|
732
|
-
await mkdir4(
|
|
907
|
+
const lockPath = path8.join(root, LOCK_FILE);
|
|
908
|
+
await mkdir4(path8.join(root, LLMWIKI_DIR), { recursive: true });
|
|
733
909
|
for (let attempt = 0; attempt < MAX_ACQUIRE_ATTEMPTS; attempt++) {
|
|
734
910
|
const created = await tryCreateLock(lockPath);
|
|
735
911
|
if (created) return true;
|
|
@@ -792,7 +968,7 @@ async function tryCreateLock(lockPath) {
|
|
|
792
968
|
}
|
|
793
969
|
async function isLockStale(lockPath) {
|
|
794
970
|
try {
|
|
795
|
-
const content = await
|
|
971
|
+
const content = await readFile5(lockPath, "utf-8");
|
|
796
972
|
const pid = parseInt(content.trim(), 10);
|
|
797
973
|
if (isNaN(pid)) return true;
|
|
798
974
|
return !isProcessAlive(pid);
|
|
@@ -801,7 +977,7 @@ async function isLockStale(lockPath) {
|
|
|
801
977
|
}
|
|
802
978
|
}
|
|
803
979
|
async function releaseLock(root) {
|
|
804
|
-
const lockPath =
|
|
980
|
+
const lockPath = path8.join(root, LOCK_FILE);
|
|
805
981
|
try {
|
|
806
982
|
await unlink(lockPath);
|
|
807
983
|
} catch {
|
|
@@ -809,6 +985,12 @@ async function releaseLock(root) {
|
|
|
809
985
|
}
|
|
810
986
|
|
|
811
987
|
// src/compiler/prompts.ts
|
|
988
|
+
var PROVENANCE_STATE_VALUES = [
|
|
989
|
+
"extracted",
|
|
990
|
+
"merged",
|
|
991
|
+
"inferred",
|
|
992
|
+
"ambiguous"
|
|
993
|
+
];
|
|
812
994
|
var CONCEPT_EXTRACTION_TOOL = {
|
|
813
995
|
name: "extract_concepts",
|
|
814
996
|
description: "Extract knowledge concepts from a source document",
|
|
@@ -836,6 +1018,31 @@ var CONCEPT_EXTRACTION_TOOL = {
|
|
|
836
1018
|
type: "array",
|
|
837
1019
|
items: { type: "string" },
|
|
838
1020
|
description: "2-4 categorical tags for organizing this concept (e.g., 'machine-learning', 'optimization')"
|
|
1021
|
+
},
|
|
1022
|
+
confidence: {
|
|
1023
|
+
type: "number",
|
|
1024
|
+
description: "Confidence in this concept on a 0..1 scale (1 = directly stated, 0 = highly speculative)."
|
|
1025
|
+
},
|
|
1026
|
+
provenance_state: {
|
|
1027
|
+
type: "string",
|
|
1028
|
+
enum: PROVENANCE_STATE_VALUES,
|
|
1029
|
+
description: "How this concept was produced: 'extracted' (direct from source), 'merged' (synthesised across sources), 'inferred' (model deduction), or 'ambiguous' (sources disagree)."
|
|
1030
|
+
},
|
|
1031
|
+
contradicted_by: {
|
|
1032
|
+
type: "array",
|
|
1033
|
+
items: {
|
|
1034
|
+
type: "object",
|
|
1035
|
+
properties: {
|
|
1036
|
+
slug: { type: "string", description: "Slug of the contradicting concept." },
|
|
1037
|
+
reason: { type: "string", description: "Brief reason for the contradiction." }
|
|
1038
|
+
},
|
|
1039
|
+
required: ["slug"]
|
|
1040
|
+
},
|
|
1041
|
+
description: "Slugs of other concepts whose evidence contradicts this one."
|
|
1042
|
+
},
|
|
1043
|
+
inferred_paragraphs: {
|
|
1044
|
+
type: "integer",
|
|
1045
|
+
description: "Estimated number of paragraphs in the page that will be inferred rather than directly cited."
|
|
839
1046
|
}
|
|
840
1047
|
},
|
|
841
1048
|
required: ["concept", "summary", "is_new"]
|
|
@@ -857,6 +1064,17 @@ ${existingIndex}` : "\n\nNo existing wiki pages yet.";
|
|
|
857
1064
|
"Each concept should be a standalone topic that someone might look up.",
|
|
858
1065
|
"Focus on key ideas, techniques, patterns, or entities \u2014 not trivial details.",
|
|
859
1066
|
"Use the extract_concepts tool to return your findings.",
|
|
1067
|
+
"",
|
|
1068
|
+
"For every concept, emit provenance metadata so downstream tools can reason",
|
|
1069
|
+
"about reliability:",
|
|
1070
|
+
" - confidence: 0..1 \u2014 how certain you are the source supports this concept.",
|
|
1071
|
+
" - provenance_state: 'extracted' if directly stated, 'merged' if synthesised",
|
|
1072
|
+
" from multiple parts of the source, 'inferred' if reasoned from context,",
|
|
1073
|
+
" or 'ambiguous' if the source is contradictory or unclear.",
|
|
1074
|
+
" - contradicted_by: slugs of other concepts (in this batch or the index)",
|
|
1075
|
+
" whose evidence conflicts with this one.",
|
|
1076
|
+
" - inferred_paragraphs: estimated number of paragraphs in the resulting",
|
|
1077
|
+
" page that will be inferred rather than directly citable.",
|
|
860
1078
|
indexSection,
|
|
861
1079
|
"\n\n--- SOURCE DOCUMENT ---\n\n",
|
|
862
1080
|
sourceContent
|
|
@@ -883,70 +1101,219 @@ ${relatedPages}` : "";
|
|
|
883
1101
|
"Source attribution: at the end of each prose paragraph, append a citation",
|
|
884
1102
|
"marker showing which source file(s) the paragraph drew from.",
|
|
885
1103
|
"Format: ^[filename.md] for single-source, ^[source-a.md, source-b.md] for multi-source.",
|
|
886
|
-
"
|
|
1104
|
+
"When a single sentence makes a specific factual claim and you can identify the",
|
|
1105
|
+
"exact line range it came from, you may use the claim-level form",
|
|
1106
|
+
"^[filename.md:START-END] (or ^[filename.md#LSTART-LEND]) at the end of that",
|
|
1107
|
+
"sentence \u2014 START and END are 1-indexed line numbers in the source file.",
|
|
1108
|
+
"Paragraph-level citations remain the default; only switch to claim-level form",
|
|
1109
|
+
"when it materially improves verifiability and the line range is unambiguous.",
|
|
1110
|
+
"Place citations only at the end of prose paragraphs or sentences \u2014 not on",
|
|
1111
|
+
"headings, list items, or code blocks.",
|
|
887
1112
|
"Source filenames are visible as `--- SOURCE: filename.md ---` headers in the content below.",
|
|
1113
|
+
"",
|
|
1114
|
+
"If a paragraph is your inference rather than a direct extraction, leave it",
|
|
1115
|
+
"uncited \u2014 downstream lint rules will count uncited paragraphs as 'inferred'",
|
|
1116
|
+
"to compute the page's provenance metadata.",
|
|
888
1117
|
existingSection,
|
|
889
1118
|
relatedSection,
|
|
890
1119
|
"\n\n--- SOURCE MATERIAL ---\n\n",
|
|
891
1120
|
sourceContent
|
|
892
1121
|
].join("\n");
|
|
893
1122
|
}
|
|
1123
|
+
function isValidRawConcept(c) {
|
|
1124
|
+
return typeof c.concept === "string" && typeof c.summary === "string" && typeof c.is_new === "boolean" && (c.tags === void 0 || Array.isArray(c.tags));
|
|
1125
|
+
}
|
|
1126
|
+
function coerceContradictedBy(raw) {
|
|
1127
|
+
if (!Array.isArray(raw)) return void 0;
|
|
1128
|
+
const refs = [];
|
|
1129
|
+
for (const entry of raw) {
|
|
1130
|
+
if (!entry || typeof entry !== "object") continue;
|
|
1131
|
+
const obj = entry;
|
|
1132
|
+
if (typeof obj.slug !== "string" || obj.slug.trim().length === 0) continue;
|
|
1133
|
+
const ref = { slug: obj.slug.trim() };
|
|
1134
|
+
if (typeof obj.reason === "string") ref.reason = obj.reason;
|
|
1135
|
+
refs.push(ref);
|
|
1136
|
+
}
|
|
1137
|
+
return refs.length > 0 ? refs : void 0;
|
|
1138
|
+
}
|
|
1139
|
+
function mapRawConcept(c) {
|
|
1140
|
+
const provenance = typeof c.provenance_state === "string" && PROVENANCE_STATE_VALUES.includes(c.provenance_state) ? c.provenance_state : void 0;
|
|
1141
|
+
return {
|
|
1142
|
+
concept: c.concept,
|
|
1143
|
+
summary: c.summary,
|
|
1144
|
+
is_new: c.is_new,
|
|
1145
|
+
tags: Array.isArray(c.tags) ? c.tags : void 0,
|
|
1146
|
+
confidence: typeof c.confidence === "number" ? c.confidence : void 0,
|
|
1147
|
+
provenanceState: provenance,
|
|
1148
|
+
contradictedBy: coerceContradictedBy(c.contradicted_by),
|
|
1149
|
+
inferredParagraphs: typeof c.inferred_paragraphs === "number" && Number.isInteger(c.inferred_paragraphs) && c.inferred_paragraphs >= 0 ? c.inferred_paragraphs : void 0
|
|
1150
|
+
};
|
|
1151
|
+
}
|
|
1152
|
+
function buildSeedPagePrompt(seed, rule, relatedPagesContent) {
|
|
1153
|
+
const minLinks = rule.minWikilinks;
|
|
1154
|
+
const linkExpectation = minLinks > 0 ? `Include at least ${minLinks} [[wikilinks]] to related pages.` : "Use [[wikilinks]] when referencing other pages.";
|
|
1155
|
+
return [
|
|
1156
|
+
`You are a wiki author. Write a ${seed.kind} page titled "${seed.title}".`,
|
|
1157
|
+
`Page-kind guidance: ${rule.description}`,
|
|
1158
|
+
`Summary line for context: ${seed.summary}`,
|
|
1159
|
+
"Draw facts only from the related wiki pages provided below.",
|
|
1160
|
+
linkExpectation,
|
|
1161
|
+
"Write in a neutral, informative tone. Be concise but thorough.",
|
|
1162
|
+
"\n\n--- RELATED PAGES ---\n\n",
|
|
1163
|
+
relatedPagesContent
|
|
1164
|
+
].join("\n");
|
|
1165
|
+
}
|
|
894
1166
|
function parseConcepts(toolOutput) {
|
|
895
1167
|
try {
|
|
896
1168
|
const parsed = JSON.parse(toolOutput);
|
|
897
1169
|
const concepts = parsed.concepts ?? [];
|
|
898
|
-
return concepts.filter(
|
|
899
|
-
(c) => typeof c.concept === "string" && typeof c.summary === "string" && typeof c.is_new === "boolean" && (c.tags === void 0 || Array.isArray(c.tags))
|
|
900
|
-
).map((c) => ({
|
|
901
|
-
concept: c.concept,
|
|
902
|
-
summary: c.summary,
|
|
903
|
-
is_new: c.is_new,
|
|
904
|
-
tags: Array.isArray(c.tags) ? c.tags : void 0
|
|
905
|
-
}));
|
|
1170
|
+
return concepts.filter(isValidRawConcept).map(mapRawConcept);
|
|
906
1171
|
} catch {
|
|
907
1172
|
return [];
|
|
908
1173
|
}
|
|
909
1174
|
}
|
|
910
1175
|
|
|
911
|
-
// src/
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
1176
|
+
// src/schema/types.ts
|
|
1177
|
+
var PAGE_KINDS = [
|
|
1178
|
+
"concept",
|
|
1179
|
+
"entity",
|
|
1180
|
+
"comparison",
|
|
1181
|
+
"overview"
|
|
1182
|
+
];
|
|
1183
|
+
|
|
1184
|
+
// src/schema/defaults.ts
|
|
1185
|
+
var DEFAULT_MIN_LINKS = {
|
|
1186
|
+
concept: 0,
|
|
1187
|
+
entity: 1,
|
|
1188
|
+
comparison: 2,
|
|
1189
|
+
overview: 3
|
|
1190
|
+
};
|
|
1191
|
+
var DEFAULT_DESCRIPTIONS = {
|
|
1192
|
+
concept: "A standalone idea, technique, or pattern worth documenting.",
|
|
1193
|
+
entity: "A specific thing \u2014 a person, product, organization, or named artifact.",
|
|
1194
|
+
comparison: "A side-by-side analysis weighing two or more concepts or entities.",
|
|
1195
|
+
overview: "A top-down map page that situates several concepts within a domain."
|
|
1196
|
+
};
|
|
1197
|
+
function buildDefaultKindRules() {
|
|
1198
|
+
return {
|
|
1199
|
+
concept: { minWikilinks: DEFAULT_MIN_LINKS.concept, description: DEFAULT_DESCRIPTIONS.concept },
|
|
1200
|
+
entity: { minWikilinks: DEFAULT_MIN_LINKS.entity, description: DEFAULT_DESCRIPTIONS.entity },
|
|
1201
|
+
comparison: {
|
|
1202
|
+
minWikilinks: DEFAULT_MIN_LINKS.comparison,
|
|
1203
|
+
description: DEFAULT_DESCRIPTIONS.comparison
|
|
1204
|
+
},
|
|
1205
|
+
overview: {
|
|
1206
|
+
minWikilinks: DEFAULT_MIN_LINKS.overview,
|
|
1207
|
+
description: DEFAULT_DESCRIPTIONS.overview
|
|
1208
|
+
}
|
|
1209
|
+
};
|
|
918
1210
|
}
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
1211
|
+
function buildDefaultSchema() {
|
|
1212
|
+
return {
|
|
1213
|
+
version: 1,
|
|
1214
|
+
defaultKind: "concept",
|
|
1215
|
+
kinds: buildDefaultKindRules(),
|
|
1216
|
+
seedPages: [],
|
|
1217
|
+
loadedFrom: null
|
|
1218
|
+
};
|
|
1219
|
+
}
|
|
1220
|
+
|
|
1221
|
+
// src/schema/loader.ts
|
|
1222
|
+
import { existsSync as existsSync2 } from "fs";
|
|
1223
|
+
import { readFile as readFile6 } from "fs/promises";
|
|
1224
|
+
import path9 from "path";
|
|
1225
|
+
import yaml2 from "js-yaml";
|
|
1226
|
+
var SCHEMA_CANDIDATE_PATHS = [
|
|
1227
|
+
".llmwiki/schema.json",
|
|
1228
|
+
".llmwiki/schema.yaml",
|
|
1229
|
+
".llmwiki/schema.yml",
|
|
1230
|
+
"wiki/.schema.yaml",
|
|
1231
|
+
"wiki/.schema.yml"
|
|
1232
|
+
];
|
|
1233
|
+
function findSchemaPath(root) {
|
|
1234
|
+
for (const candidate of SCHEMA_CANDIDATE_PATHS) {
|
|
1235
|
+
const absolute = path9.join(root, candidate);
|
|
1236
|
+
if (existsSync2(absolute)) return absolute;
|
|
926
1237
|
}
|
|
927
|
-
|
|
928
|
-
changes.push(...deletedChanges);
|
|
929
|
-
return changes;
|
|
1238
|
+
return null;
|
|
930
1239
|
}
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
1240
|
+
function parseSchemaFile(filePath, content) {
|
|
1241
|
+
const isJson = filePath.endsWith(".json");
|
|
1242
|
+
const parsed = isJson ? JSON.parse(content) : yaml2.load(content);
|
|
1243
|
+
if (parsed && typeof parsed === "object") return parsed;
|
|
1244
|
+
return {};
|
|
1245
|
+
}
|
|
1246
|
+
function isPageKind(value) {
|
|
1247
|
+
return typeof value === "string" && PAGE_KINDS.includes(value);
|
|
1248
|
+
}
|
|
1249
|
+
function mergeKindRule(defaults, override) {
|
|
1250
|
+
if (!override) return defaults;
|
|
1251
|
+
const minWikilinks = typeof override.minWikilinks === "number" ? override.minWikilinks : defaults.minWikilinks;
|
|
1252
|
+
const description = typeof override.description === "string" ? override.description : defaults.description;
|
|
1253
|
+
return { minWikilinks, description };
|
|
1254
|
+
}
|
|
1255
|
+
function mergeKinds(defaults, overrides) {
|
|
1256
|
+
const merged = { ...defaults };
|
|
1257
|
+
if (!overrides) return merged;
|
|
1258
|
+
for (const kind of PAGE_KINDS) {
|
|
1259
|
+
merged[kind] = mergeKindRule(defaults[kind], overrides[kind]);
|
|
937
1260
|
}
|
|
1261
|
+
return merged;
|
|
938
1262
|
}
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
const
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
return "unchanged";
|
|
1263
|
+
function normalizeSeedPage(entry) {
|
|
1264
|
+
if (typeof entry.title !== "string" || entry.title.trim() === "") return null;
|
|
1265
|
+
if (!isPageKind(entry.kind)) return null;
|
|
1266
|
+
const summary = typeof entry.summary === "string" ? entry.summary : "";
|
|
1267
|
+
const relatedSlugs = Array.isArray(entry.relatedSlugs) ? entry.relatedSlugs.filter((slug) => typeof slug === "string") : void 0;
|
|
1268
|
+
return { title: entry.title, kind: entry.kind, summary, relatedSlugs };
|
|
946
1269
|
}
|
|
947
|
-
function
|
|
948
|
-
|
|
949
|
-
return
|
|
1270
|
+
function normalizeSeedPages(entries) {
|
|
1271
|
+
if (!Array.isArray(entries)) return [];
|
|
1272
|
+
return entries.map(normalizeSeedPage).filter((entry) => entry !== null);
|
|
1273
|
+
}
|
|
1274
|
+
function applyOverrides(defaults, overrides, loadedFrom) {
|
|
1275
|
+
const defaultKind = isPageKind(overrides.defaultKind) ? overrides.defaultKind : defaults.defaultKind;
|
|
1276
|
+
return {
|
|
1277
|
+
version: 1,
|
|
1278
|
+
defaultKind,
|
|
1279
|
+
kinds: mergeKinds(defaults.kinds, overrides.kinds),
|
|
1280
|
+
seedPages: normalizeSeedPages(overrides.seedPages),
|
|
1281
|
+
loadedFrom
|
|
1282
|
+
};
|
|
1283
|
+
}
|
|
1284
|
+
async function loadSchema(root) {
|
|
1285
|
+
const defaults = buildDefaultSchema();
|
|
1286
|
+
const schemaPath = findSchemaPath(root);
|
|
1287
|
+
if (!schemaPath) return defaults;
|
|
1288
|
+
const raw = await readFile6(schemaPath, "utf-8");
|
|
1289
|
+
const parsed = parseSchemaFile(schemaPath, raw);
|
|
1290
|
+
return applyOverrides(defaults, parsed, schemaPath);
|
|
1291
|
+
}
|
|
1292
|
+
function defaultSchemaInitPath(root) {
|
|
1293
|
+
return path9.join(root, SCHEMA_CANDIDATE_PATHS[0]);
|
|
1294
|
+
}
|
|
1295
|
+
|
|
1296
|
+
// src/schema/helpers.ts
|
|
1297
|
+
import yaml3 from "js-yaml";
|
|
1298
|
+
var WIKILINK_PATTERN = /\[\[([^\]]+)\]\]/g;
|
|
1299
|
+
function resolvePageKind(rawKind, schema) {
|
|
1300
|
+
if (typeof rawKind === "string" && PAGE_KINDS.includes(rawKind)) {
|
|
1301
|
+
return rawKind;
|
|
1302
|
+
}
|
|
1303
|
+
return schema.defaultKind;
|
|
1304
|
+
}
|
|
1305
|
+
function countWikilinks(body) {
|
|
1306
|
+
const matches = body.match(WIKILINK_PATTERN);
|
|
1307
|
+
return matches ? matches.length : 0;
|
|
1308
|
+
}
|
|
1309
|
+
function serializeSchemaToYaml(schema) {
|
|
1310
|
+
const serializable = {
|
|
1311
|
+
version: schema.version,
|
|
1312
|
+
defaultKind: schema.defaultKind,
|
|
1313
|
+
kinds: schema.kinds,
|
|
1314
|
+
seedPages: schema.seedPages
|
|
1315
|
+
};
|
|
1316
|
+
return yaml3.dump(serializable, { lineWidth: -1, quotingType: '"' });
|
|
950
1317
|
}
|
|
951
1318
|
|
|
952
1319
|
// src/compiler/deps.ts
|
|
@@ -1095,7 +1462,7 @@ async function freezeFailedExtractions(root, results, frozenSlugs) {
|
|
|
1095
1462
|
}
|
|
1096
1463
|
|
|
1097
1464
|
// src/compiler/orphan.ts
|
|
1098
|
-
import
|
|
1465
|
+
import path10 from "path";
|
|
1099
1466
|
async function markOrphaned(root, sourceFile, state) {
|
|
1100
1467
|
const sourceEntry = state.sources[sourceFile];
|
|
1101
1468
|
if (!sourceEntry) return;
|
|
@@ -1121,7 +1488,7 @@ async function orphanUnownedFrozenPages(root, frozenSlugs) {
|
|
|
1121
1488
|
}
|
|
1122
1489
|
}
|
|
1123
1490
|
async function orphanPage(root, slug, reason) {
|
|
1124
|
-
const pagePath =
|
|
1491
|
+
const pagePath = path10.join(root, CONCEPTS_DIR, `${slug}.md`);
|
|
1125
1492
|
const content = await safeReadFile(pagePath);
|
|
1126
1493
|
if (!content) return;
|
|
1127
1494
|
const { meta } = parseFrontmatter(content);
|
|
@@ -1132,18 +1499,18 @@ async function orphanPage(root, slug, reason) {
|
|
|
1132
1499
|
}
|
|
1133
1500
|
|
|
1134
1501
|
// src/compiler/resolver.ts
|
|
1135
|
-
import { readdir as readdir2, readFile as
|
|
1136
|
-
import
|
|
1137
|
-
import { existsSync as
|
|
1502
|
+
import { readdir as readdir2, readFile as readFile7 } from "fs/promises";
|
|
1503
|
+
import path11 from "path";
|
|
1504
|
+
import { existsSync as existsSync3 } from "fs";
|
|
1138
1505
|
async function buildTitleIndex(root) {
|
|
1139
|
-
const conceptsDir =
|
|
1140
|
-
if (!
|
|
1506
|
+
const conceptsDir = path11.join(root, CONCEPTS_DIR);
|
|
1507
|
+
if (!existsSync3(conceptsDir)) return [];
|
|
1141
1508
|
const files = await readdir2(conceptsDir);
|
|
1142
1509
|
const pages = [];
|
|
1143
1510
|
for (const file of files) {
|
|
1144
1511
|
if (!file.endsWith(".md")) continue;
|
|
1145
|
-
const filePath =
|
|
1146
|
-
const content = await
|
|
1512
|
+
const filePath = path11.join(conceptsDir, file);
|
|
1513
|
+
const content = await readFile7(filePath, "utf-8");
|
|
1147
1514
|
const { meta } = parseFrontmatter(content);
|
|
1148
1515
|
if (meta.title && typeof meta.title === "string" && !meta.orphaned) {
|
|
1149
1516
|
pages.push({
|
|
@@ -1197,7 +1564,7 @@ function addWikilinks(body, titles, selfTitle) {
|
|
|
1197
1564
|
const matches = findTitleMatches(result, page.title);
|
|
1198
1565
|
for (const m of matches.reverse()) {
|
|
1199
1566
|
if (!isLinkablePosition(result, m.start, m.end)) continue;
|
|
1200
|
-
result = result.slice(0, m.start) + `[[${page.title}]]` + result.slice(m.end);
|
|
1567
|
+
result = result.slice(0, m.start) + `[[${page.slug}|${page.title}]]` + result.slice(m.end);
|
|
1201
1568
|
}
|
|
1202
1569
|
}
|
|
1203
1570
|
return result;
|
|
@@ -1229,7 +1596,7 @@ async function resolveInboundLinks(titleIndex, newSlugs) {
|
|
|
1229
1596
|
let count = 0;
|
|
1230
1597
|
for (const page of titleIndex) {
|
|
1231
1598
|
if (newSlugs.includes(page.slug)) continue;
|
|
1232
|
-
const content = await
|
|
1599
|
+
const content = await readFile7(page.filePath, "utf-8");
|
|
1233
1600
|
const { body } = parseFrontmatter(content);
|
|
1234
1601
|
const linked = addWikilinks(body, newTitles, page.title);
|
|
1235
1602
|
if (linked !== body) {
|
|
@@ -1241,7 +1608,7 @@ async function resolveInboundLinks(titleIndex, newSlugs) {
|
|
|
1241
1608
|
return count;
|
|
1242
1609
|
}
|
|
1243
1610
|
async function linkPage(page, titleIndex) {
|
|
1244
|
-
const content = await
|
|
1611
|
+
const content = await readFile7(page.filePath, "utf-8");
|
|
1245
1612
|
const { body } = parseFrontmatter(content);
|
|
1246
1613
|
const linked = addWikilinks(body, titleIndex, page.title);
|
|
1247
1614
|
if (linked === body) return false;
|
|
@@ -1252,17 +1619,17 @@ async function linkPage(page, titleIndex) {
|
|
|
1252
1619
|
|
|
1253
1620
|
// src/compiler/indexgen.ts
|
|
1254
1621
|
import { readdir as readdir3 } from "fs/promises";
|
|
1255
|
-
import
|
|
1622
|
+
import path12 from "path";
|
|
1256
1623
|
async function generateIndex(root) {
|
|
1257
1624
|
status("*", info("Generating index..."));
|
|
1258
|
-
const conceptsPath =
|
|
1259
|
-
const queriesPath =
|
|
1625
|
+
const conceptsPath = path12.join(root, CONCEPTS_DIR);
|
|
1626
|
+
const queriesPath = path12.join(root, QUERIES_DIR);
|
|
1260
1627
|
const concepts = await collectPageSummaries(conceptsPath);
|
|
1261
1628
|
const queries = await collectPageSummaries(queriesPath);
|
|
1262
1629
|
concepts.sort((a, b) => a.title.localeCompare(b.title));
|
|
1263
1630
|
queries.sort((a, b) => a.title.localeCompare(b.title));
|
|
1264
1631
|
const indexContent = buildIndexContent(concepts, queries);
|
|
1265
|
-
const indexPath =
|
|
1632
|
+
const indexPath = path12.join(root, INDEX_FILE);
|
|
1266
1633
|
await atomicWrite(indexPath, indexContent);
|
|
1267
1634
|
const total = concepts.length + queries.length;
|
|
1268
1635
|
status("+", success(`Index updated with ${total} pages.`));
|
|
@@ -1276,7 +1643,7 @@ async function scanWikiPages(dirPath) {
|
|
|
1276
1643
|
}
|
|
1277
1644
|
const scanned = [];
|
|
1278
1645
|
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
1279
|
-
const content = await safeReadFile(
|
|
1646
|
+
const content = await safeReadFile(path12.join(dirPath, file));
|
|
1280
1647
|
const { meta } = parseFrontmatter(content);
|
|
1281
1648
|
scanned.push({ slug: file.replace(/\.md$/, ""), meta });
|
|
1282
1649
|
}
|
|
@@ -1296,12 +1663,12 @@ function stripWikilinks(text) {
|
|
|
1296
1663
|
function buildIndexContent(concepts, queries) {
|
|
1297
1664
|
const lines = ["# Knowledge Wiki", "", "## Concepts", ""];
|
|
1298
1665
|
for (const page of concepts) {
|
|
1299
|
-
lines.push(`- **[[${page.title}]]** \u2014 ${stripWikilinks(page.summary)}`);
|
|
1666
|
+
lines.push(`- **[[${page.slug}|${page.title}]]** \u2014 ${stripWikilinks(page.summary)}`);
|
|
1300
1667
|
}
|
|
1301
1668
|
if (queries.length > 0) {
|
|
1302
1669
|
lines.push("", "## Saved Queries", "");
|
|
1303
1670
|
for (const page of queries) {
|
|
1304
|
-
lines.push(`- **[[${page.title}]]** \u2014 ${stripWikilinks(page.summary)}`);
|
|
1671
|
+
lines.push(`- **[[${page.slug}|${page.title}]]** \u2014 ${stripWikilinks(page.summary)}`);
|
|
1305
1672
|
}
|
|
1306
1673
|
}
|
|
1307
1674
|
const total = concepts.length + queries.length;
|
|
@@ -1313,7 +1680,7 @@ function buildIndexContent(concepts, queries) {
|
|
|
1313
1680
|
|
|
1314
1681
|
// src/compiler/obsidian.ts
|
|
1315
1682
|
import { readdir as readdir4 } from "fs/promises";
|
|
1316
|
-
import
|
|
1683
|
+
import path13 from "path";
|
|
1317
1684
|
var ABBREVIATION_MIN_WORDS = 3;
|
|
1318
1685
|
var SWAP_CONJUNCTIONS = [" and ", " or "];
|
|
1319
1686
|
function addObsidianMeta(frontmatter, conceptTitle, tags) {
|
|
@@ -1355,11 +1722,11 @@ function generateAbbreviation(title) {
|
|
|
1355
1722
|
return abbreviation;
|
|
1356
1723
|
}
|
|
1357
1724
|
async function generateMOC(root) {
|
|
1358
|
-
const conceptsPath =
|
|
1725
|
+
const conceptsPath = path13.join(root, CONCEPTS_DIR);
|
|
1359
1726
|
const pages = await loadConceptPages(conceptsPath);
|
|
1360
1727
|
const tagGroups = groupPagesByTag(pages);
|
|
1361
1728
|
const content = buildMOCContent(tagGroups);
|
|
1362
|
-
await atomicWrite(
|
|
1729
|
+
await atomicWrite(path13.join(root, MOC_FILE), content);
|
|
1363
1730
|
}
|
|
1364
1731
|
async function loadConceptPages(conceptsPath) {
|
|
1365
1732
|
let files;
|
|
@@ -1371,13 +1738,14 @@ async function loadConceptPages(conceptsPath) {
|
|
|
1371
1738
|
const pages = [];
|
|
1372
1739
|
for (const file of files) {
|
|
1373
1740
|
if (!file.endsWith(".md")) continue;
|
|
1374
|
-
const content = await safeReadFile(
|
|
1741
|
+
const content = await safeReadFile(path13.join(conceptsPath, file));
|
|
1375
1742
|
if (!content) continue;
|
|
1376
1743
|
const { meta } = parseFrontmatter(content);
|
|
1377
1744
|
if (meta.orphaned) continue;
|
|
1378
|
-
const
|
|
1745
|
+
const slug = file.replace(/\.md$/, "");
|
|
1746
|
+
const title = typeof meta.title === "string" ? meta.title : slug;
|
|
1379
1747
|
const tags = Array.isArray(meta.tags) ? meta.tags : [];
|
|
1380
|
-
pages.push({ title, tags });
|
|
1748
|
+
pages.push({ slug, title, tags });
|
|
1381
1749
|
}
|
|
1382
1750
|
return pages;
|
|
1383
1751
|
}
|
|
@@ -1385,21 +1753,21 @@ function groupPagesByTag(pages) {
|
|
|
1385
1753
|
const groups = /* @__PURE__ */ new Map();
|
|
1386
1754
|
for (const page of pages) {
|
|
1387
1755
|
if (page.tags.length === 0) {
|
|
1388
|
-
appendToGroup(groups, "Uncategorized", page
|
|
1756
|
+
appendToGroup(groups, "Uncategorized", page);
|
|
1389
1757
|
continue;
|
|
1390
1758
|
}
|
|
1391
1759
|
for (const tag of page.tags) {
|
|
1392
|
-
appendToGroup(groups, tag, page
|
|
1760
|
+
appendToGroup(groups, tag, page);
|
|
1393
1761
|
}
|
|
1394
1762
|
}
|
|
1395
1763
|
return groups;
|
|
1396
1764
|
}
|
|
1397
|
-
function appendToGroup(groups, key,
|
|
1765
|
+
function appendToGroup(groups, key, page) {
|
|
1398
1766
|
const existing = groups.get(key);
|
|
1399
1767
|
if (existing) {
|
|
1400
|
-
existing.push(
|
|
1768
|
+
existing.push(page);
|
|
1401
1769
|
} else {
|
|
1402
|
-
groups.set(key, [
|
|
1770
|
+
groups.set(key, [page]);
|
|
1403
1771
|
}
|
|
1404
1772
|
}
|
|
1405
1773
|
function buildMOCContent(tagGroups) {
|
|
@@ -1410,10 +1778,10 @@ function buildMOCContent(tagGroups) {
|
|
|
1410
1778
|
return a.localeCompare(b);
|
|
1411
1779
|
});
|
|
1412
1780
|
for (const tag of sortedTags) {
|
|
1413
|
-
const
|
|
1781
|
+
const pages = tagGroups.get(tag) ?? [];
|
|
1414
1782
|
lines.push(`## ${tag}`, "");
|
|
1415
|
-
for (const
|
|
1416
|
-
lines.push(`- [[${title}]]`);
|
|
1783
|
+
for (const page of pages.sort((a, b) => a.title.localeCompare(b.title))) {
|
|
1784
|
+
lines.push(`- [[${page.slug}|${page.title}]]`);
|
|
1417
1785
|
}
|
|
1418
1786
|
lines.push("");
|
|
1419
1787
|
}
|
|
@@ -1421,9 +1789,9 @@ function buildMOCContent(tagGroups) {
|
|
|
1421
1789
|
}
|
|
1422
1790
|
|
|
1423
1791
|
// src/utils/embeddings.ts
|
|
1424
|
-
import { readFile as
|
|
1425
|
-
import { existsSync as
|
|
1426
|
-
import
|
|
1792
|
+
import { readFile as readFile8, readdir as readdir5 } from "fs/promises";
|
|
1793
|
+
import { existsSync as existsSync4 } from "fs";
|
|
1794
|
+
import path14 from "path";
|
|
1427
1795
|
function cosineSimilarity(a, b) {
|
|
1428
1796
|
if (a.length !== b.length || a.length === 0) return 0;
|
|
1429
1797
|
let dot = 0;
|
|
@@ -1446,18 +1814,23 @@ function findTopK(queryVec, store, k) {
|
|
|
1446
1814
|
return scored.slice(0, k).map((item) => item.entry);
|
|
1447
1815
|
}
|
|
1448
1816
|
async function readEmbeddingStore(root) {
|
|
1449
|
-
const filePath =
|
|
1450
|
-
if (!
|
|
1451
|
-
const raw = await
|
|
1817
|
+
const filePath = path14.join(root, EMBEDDINGS_FILE);
|
|
1818
|
+
if (!existsSync4(filePath)) return null;
|
|
1819
|
+
const raw = await readFile8(filePath, "utf-8");
|
|
1452
1820
|
return JSON.parse(raw);
|
|
1453
1821
|
}
|
|
1454
1822
|
async function writeEmbeddingStore(root, store) {
|
|
1455
|
-
const filePath =
|
|
1823
|
+
const filePath = path14.join(root, EMBEDDINGS_FILE);
|
|
1456
1824
|
await atomicWrite(filePath, JSON.stringify(store, null, 2));
|
|
1457
1825
|
}
|
|
1458
1826
|
async function findRelevantPages(root, question) {
|
|
1459
1827
|
const store = await readEmbeddingStore(root);
|
|
1460
1828
|
if (!store || store.entries.length === 0) return [];
|
|
1829
|
+
const activeModel = resolveEmbeddingModel();
|
|
1830
|
+
if (store.model !== activeModel) {
|
|
1831
|
+
warnStaleEmbeddingStore(store.model, activeModel);
|
|
1832
|
+
return [];
|
|
1833
|
+
}
|
|
1461
1834
|
const queryVec = await getProvider().embed(question);
|
|
1462
1835
|
return findTopK(queryVec, store, EMBEDDING_TOP_K).map((entry) => ({
|
|
1463
1836
|
slug: entry.slug,
|
|
@@ -1468,7 +1841,7 @@ async function findRelevantPages(root, question) {
|
|
|
1468
1841
|
async function collectPageRecords(root) {
|
|
1469
1842
|
const records = [];
|
|
1470
1843
|
for (const dir of [CONCEPTS_DIR, QUERIES_DIR]) {
|
|
1471
|
-
const absDir =
|
|
1844
|
+
const absDir = path14.join(root, dir);
|
|
1472
1845
|
let files;
|
|
1473
1846
|
try {
|
|
1474
1847
|
files = await readdir5(absDir);
|
|
@@ -1476,7 +1849,7 @@ async function collectPageRecords(root) {
|
|
|
1476
1849
|
continue;
|
|
1477
1850
|
}
|
|
1478
1851
|
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
1479
|
-
const content = await safeReadFile(
|
|
1852
|
+
const content = await safeReadFile(path14.join(absDir, file));
|
|
1480
1853
|
const { meta } = parseFrontmatter(content);
|
|
1481
1854
|
if (meta.orphaned || typeof meta.title !== "string") continue;
|
|
1482
1855
|
records.push({
|
|
@@ -1510,8 +1883,25 @@ async function embedPages(records, slugsToEmbed) {
|
|
|
1510
1883
|
}
|
|
1511
1884
|
return fresh;
|
|
1512
1885
|
}
|
|
1886
|
+
var warnedStaleModels = /* @__PURE__ */ new Set();
|
|
1887
|
+
function warnStaleEmbeddingStore(storedModel, activeModel) {
|
|
1888
|
+
const key = `${storedModel}\u2192${activeModel}`;
|
|
1889
|
+
if (warnedStaleModels.has(key)) return;
|
|
1890
|
+
warnedStaleModels.add(key);
|
|
1891
|
+
status(
|
|
1892
|
+
"!",
|
|
1893
|
+
warn(
|
|
1894
|
+
`Embedding store was built with "${storedModel}" but active embedding model is "${activeModel}". Falling back to full-index selection. Run 'llmwiki compile' to rebuild embeddings.`
|
|
1895
|
+
)
|
|
1896
|
+
);
|
|
1897
|
+
}
|
|
1513
1898
|
function resolveEmbeddingModel() {
|
|
1514
|
-
|
|
1899
|
+
const providerName = getActiveProviderName();
|
|
1900
|
+
const configuredModel = process.env.LLMWIKI_EMBEDDING_MODEL?.trim();
|
|
1901
|
+
if (configuredModel && (providerName === "openai" || providerName === "ollama")) {
|
|
1902
|
+
return configuredModel;
|
|
1903
|
+
}
|
|
1904
|
+
return EMBEDDING_MODELS[providerName] ?? EMBEDDING_MODELS.anthropic;
|
|
1515
1905
|
}
|
|
1516
1906
|
function mergeEntries(existing, fresh, liveSlugs) {
|
|
1517
1907
|
const bySlug = /* @__PURE__ */ new Map();
|
|
@@ -1526,13 +1916,15 @@ function mergeEntries(existing, fresh, liveSlugs) {
|
|
|
1526
1916
|
async function updateEmbeddings(root, changedSlugs) {
|
|
1527
1917
|
const records = await collectPageRecords(root);
|
|
1528
1918
|
const liveSlugs = new Set(records.map((r) => r.slug));
|
|
1529
|
-
const
|
|
1919
|
+
const embeddingModel = resolveEmbeddingModel();
|
|
1530
1920
|
const existingStore = await readEmbeddingStore(root);
|
|
1531
|
-
const
|
|
1532
|
-
|
|
1921
|
+
const modelChanged = Boolean(existingStore && existingStore.model !== embeddingModel);
|
|
1922
|
+
const toEmbed = new Set(changedSlugs.filter((slug) => liveSlugs.has(slug)));
|
|
1923
|
+
const previousEntries = modelChanged ? [] : existingStore?.entries ?? [];
|
|
1924
|
+
if (!existingStore || modelChanged) {
|
|
1533
1925
|
for (const record of records) toEmbed.add(record.slug);
|
|
1534
1926
|
}
|
|
1535
|
-
if (toEmbed.size === 0 && previousEntries.every((e) => liveSlugs.has(e.slug))) {
|
|
1927
|
+
if (!modelChanged && toEmbed.size === 0 && previousEntries.every((e) => liveSlugs.has(e.slug))) {
|
|
1536
1928
|
return;
|
|
1537
1929
|
}
|
|
1538
1930
|
const freshEntries = await embedPages(records, toEmbed);
|
|
@@ -1540,7 +1932,7 @@ async function updateEmbeddings(root, changedSlugs) {
|
|
|
1540
1932
|
const dimensions = mergedEntries[0]?.vector.length ?? 0;
|
|
1541
1933
|
const store = {
|
|
1542
1934
|
version: 1,
|
|
1543
|
-
model:
|
|
1935
|
+
model: embeddingModel,
|
|
1544
1936
|
dimensions,
|
|
1545
1937
|
entries: mergedEntries
|
|
1546
1938
|
};
|
|
@@ -1548,62 +1940,604 @@ async function updateEmbeddings(root, changedSlugs) {
|
|
|
1548
1940
|
status("*", dim(`Embeddings updated (${mergedEntries.length} pages).`));
|
|
1549
1941
|
}
|
|
1550
1942
|
|
|
1551
|
-
// src/compiler/
|
|
1552
|
-
import
|
|
1553
|
-
|
|
1554
|
-
|
|
1943
|
+
// src/compiler/candidates.ts
|
|
1944
|
+
import { readdir as readdir6, rename as rename3, unlink as unlink2, writeFile as writeFile4, mkdir as mkdir5 } from "fs/promises";
|
|
1945
|
+
import { existsSync as existsSync5 } from "fs";
|
|
1946
|
+
import path15 from "path";
|
|
1947
|
+
import { randomBytes } from "crypto";
|
|
1948
|
+
var ID_SUFFIX_BYTES = 4;
|
|
1949
|
+
var CANDIDATE_EXT = ".json";
|
|
1950
|
+
function buildCandidateId(slug) {
|
|
1951
|
+
const suffix = randomBytes(ID_SUFFIX_BYTES).toString("hex");
|
|
1952
|
+
return `${slug}-${suffix}`;
|
|
1953
|
+
}
|
|
1954
|
+
function candidatePath(root, id) {
|
|
1955
|
+
return path15.join(root, CANDIDATES_DIR, `${id}${CANDIDATE_EXT}`);
|
|
1956
|
+
}
|
|
1957
|
+
function archivePath(root, id) {
|
|
1958
|
+
return path15.join(root, CANDIDATES_ARCHIVE_DIR, `${id}${CANDIDATE_EXT}`);
|
|
1959
|
+
}
|
|
1960
|
+
async function writeCandidate(root, draft) {
|
|
1961
|
+
const candidate = {
|
|
1962
|
+
id: buildCandidateId(draft.slug),
|
|
1963
|
+
title: draft.title,
|
|
1964
|
+
slug: draft.slug,
|
|
1965
|
+
summary: draft.summary,
|
|
1966
|
+
sources: draft.sources,
|
|
1967
|
+
body: draft.body,
|
|
1968
|
+
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1969
|
+
...draft.sourceStates ? { sourceStates: draft.sourceStates } : {},
|
|
1970
|
+
...draft.schemaViolations ? { schemaViolations: draft.schemaViolations } : {}
|
|
1971
|
+
};
|
|
1972
|
+
await atomicWrite(candidatePath(root, candidate.id), JSON.stringify(candidate, null, 2));
|
|
1973
|
+
return candidate;
|
|
1555
1974
|
}
|
|
1556
|
-
|
|
1557
|
-
|
|
1975
|
+
function failWithError(message) {
|
|
1976
|
+
status("!", error(message));
|
|
1977
|
+
process.exitCode = 1;
|
|
1978
|
+
return null;
|
|
1558
1979
|
}
|
|
1559
|
-
async function
|
|
1560
|
-
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
};
|
|
1980
|
+
async function loadCandidateOrFail(root, id) {
|
|
1981
|
+
const candidate = await readCandidate(root, id);
|
|
1982
|
+
if (!candidate) return failWithError(`Candidate not found: ${id}`);
|
|
1983
|
+
return candidate;
|
|
1984
|
+
}
|
|
1985
|
+
async function loadCandidateUnderLockOrFail(root, id) {
|
|
1986
|
+
const candidate = await readCandidate(root, id);
|
|
1987
|
+
if (!candidate) {
|
|
1988
|
+
return failWithError(`Candidate ${id} was removed by another process during review.`);
|
|
1568
1989
|
}
|
|
1990
|
+
return candidate;
|
|
1991
|
+
}
|
|
1992
|
+
async function readCandidate(root, id) {
|
|
1993
|
+
const raw = await safeReadFile(candidatePath(root, id));
|
|
1994
|
+
if (!raw) return null;
|
|
1569
1995
|
try {
|
|
1570
|
-
|
|
1571
|
-
|
|
1572
|
-
|
|
1996
|
+
const parsed = JSON.parse(raw);
|
|
1997
|
+
if (!isValidCandidate(parsed)) return null;
|
|
1998
|
+
return parsed;
|
|
1999
|
+
} catch {
|
|
2000
|
+
return null;
|
|
2001
|
+
}
|
|
2002
|
+
}
|
|
2003
|
+
function isValidCandidate(value) {
|
|
2004
|
+
if (!value || typeof value !== "object") return false;
|
|
2005
|
+
const candidate = value;
|
|
2006
|
+
return typeof candidate.id === "string" && typeof candidate.title === "string" && typeof candidate.slug === "string" && typeof candidate.body === "string" && Array.isArray(candidate.sources);
|
|
2007
|
+
}
|
|
2008
|
+
async function listCandidates(root) {
|
|
2009
|
+
const dir = path15.join(root, CANDIDATES_DIR);
|
|
2010
|
+
if (!existsSync5(dir)) return [];
|
|
2011
|
+
const entries = await readdir6(dir, { withFileTypes: true });
|
|
2012
|
+
const candidates = [];
|
|
2013
|
+
for (const entry of entries) {
|
|
2014
|
+
if (!entry.isFile() || !entry.name.endsWith(CANDIDATE_EXT)) continue;
|
|
2015
|
+
const id = entry.name.slice(0, -CANDIDATE_EXT.length);
|
|
2016
|
+
const candidate = await readCandidate(root, id);
|
|
2017
|
+
if (candidate) candidates.push(candidate);
|
|
2018
|
+
}
|
|
2019
|
+
candidates.sort((a, b) => a.generatedAt.localeCompare(b.generatedAt));
|
|
2020
|
+
return candidates;
|
|
2021
|
+
}
|
|
2022
|
+
async function countCandidates(root) {
|
|
2023
|
+
const candidates = await listCandidates(root);
|
|
2024
|
+
return candidates.length;
|
|
2025
|
+
}
|
|
2026
|
+
async function deleteCandidate(root, id) {
|
|
2027
|
+
const filePath = candidatePath(root, id);
|
|
2028
|
+
if (!existsSync5(filePath)) return false;
|
|
2029
|
+
await unlink2(filePath);
|
|
2030
|
+
return true;
|
|
2031
|
+
}
|
|
2032
|
+
async function archiveCandidate(root, id) {
|
|
2033
|
+
const sourcePath = candidatePath(root, id);
|
|
2034
|
+
if (!existsSync5(sourcePath)) return false;
|
|
2035
|
+
const target = archivePath(root, id);
|
|
2036
|
+
await mkdir5(path15.dirname(target), { recursive: true });
|
|
2037
|
+
try {
|
|
2038
|
+
await rename3(sourcePath, target);
|
|
2039
|
+
} catch {
|
|
2040
|
+
const raw = await safeReadFile(sourcePath);
|
|
2041
|
+
await writeFile4(target, raw, "utf-8");
|
|
2042
|
+
await unlink2(sourcePath);
|
|
1573
2043
|
}
|
|
2044
|
+
return true;
|
|
1574
2045
|
}
|
|
1575
|
-
|
|
1576
|
-
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
|
|
2046
|
+
|
|
2047
|
+
// src/linter/rules.ts
|
|
2048
|
+
import { readdir as readdir7, readFile as readFile9 } from "fs/promises";
|
|
2049
|
+
import { existsSync as existsSync6 } from "fs";
|
|
2050
|
+
import path16 from "path";
|
|
2051
|
+
var MIN_BODY_LENGTH = 50;
|
|
2052
|
+
var WIKILINK_PATTERN2 = /\[\[([^\]]+)\]\]/g;
|
|
2053
|
+
var CITATION_PATTERN = /\^\[([^\]]+)\]/g;
|
|
2054
|
+
function findMatchesInContent(content, pattern) {
|
|
2055
|
+
const results = [];
|
|
2056
|
+
const lines = content.split("\n");
|
|
2057
|
+
for (let i = 0; i < lines.length; i++) {
|
|
2058
|
+
const matches = lines[i].matchAll(pattern);
|
|
2059
|
+
for (const match of matches) {
|
|
2060
|
+
results.push({ captured: match[1], line: i + 1 });
|
|
2061
|
+
}
|
|
2062
|
+
}
|
|
2063
|
+
return results;
|
|
1581
2064
|
}
|
|
1582
|
-
async function
|
|
1583
|
-
|
|
1584
|
-
const
|
|
1585
|
-
const
|
|
1586
|
-
const
|
|
1587
|
-
|
|
1588
|
-
const
|
|
1589
|
-
|
|
1590
|
-
return
|
|
1591
|
-
})
|
|
2065
|
+
async function readMarkdownFiles(dirPath) {
|
|
2066
|
+
if (!existsSync6(dirPath)) return [];
|
|
2067
|
+
const entries = await readdir7(dirPath);
|
|
2068
|
+
const mdFiles = entries.filter((f) => f.endsWith(".md"));
|
|
2069
|
+
const results = await Promise.all(
|
|
2070
|
+
mdFiles.map(async (fileName) => {
|
|
2071
|
+
const filePath = path16.join(dirPath, fileName);
|
|
2072
|
+
const content = await readFile9(filePath, "utf-8");
|
|
2073
|
+
return { filePath, content };
|
|
2074
|
+
})
|
|
1592
2075
|
);
|
|
1593
|
-
return
|
|
2076
|
+
return results;
|
|
1594
2077
|
}
|
|
1595
|
-
async function
|
|
1596
|
-
|
|
1597
|
-
|
|
2078
|
+
async function collectAllPages(root) {
|
|
2079
|
+
const conceptPages = await readMarkdownFiles(path16.join(root, CONCEPTS_DIR));
|
|
2080
|
+
const queryPages = await readMarkdownFiles(path16.join(root, QUERIES_DIR));
|
|
2081
|
+
return [...conceptPages, ...queryPages];
|
|
2082
|
+
}
|
|
2083
|
+
function buildPageSlugSet(pages) {
|
|
2084
|
+
const slugs = /* @__PURE__ */ new Set();
|
|
2085
|
+
for (const page of pages) {
|
|
2086
|
+
const baseName = path16.basename(page.filePath, ".md");
|
|
2087
|
+
slugs.add(baseName.toLowerCase());
|
|
2088
|
+
}
|
|
2089
|
+
return slugs;
|
|
2090
|
+
}
|
|
2091
|
+
async function checkBrokenWikilinks(root) {
|
|
2092
|
+
const pages = await collectAllPages(root);
|
|
2093
|
+
const existingSlugs = buildPageSlugSet(pages);
|
|
2094
|
+
const results = [];
|
|
2095
|
+
for (const page of pages) {
|
|
2096
|
+
for (const { captured, line } of findMatchesInContent(page.content, WIKILINK_PATTERN2)) {
|
|
2097
|
+
const linkSlug = slugify(captured);
|
|
2098
|
+
if (!existingSlugs.has(linkSlug)) {
|
|
2099
|
+
results.push({
|
|
2100
|
+
rule: "broken-wikilink",
|
|
2101
|
+
severity: "error",
|
|
2102
|
+
file: page.filePath,
|
|
2103
|
+
message: `Broken wikilink [[${captured}]] \u2014 no matching page found`,
|
|
2104
|
+
line
|
|
2105
|
+
});
|
|
2106
|
+
}
|
|
2107
|
+
}
|
|
2108
|
+
}
|
|
2109
|
+
return results;
|
|
2110
|
+
}
|
|
2111
|
+
async function checkOrphanedPages(root) {
|
|
2112
|
+
const pages = await collectAllPages(root);
|
|
2113
|
+
const results = [];
|
|
2114
|
+
for (const page of pages) {
|
|
2115
|
+
const { meta } = parseFrontmatter(page.content);
|
|
2116
|
+
if (meta.orphaned === true) {
|
|
2117
|
+
results.push({
|
|
2118
|
+
rule: "orphaned-page",
|
|
2119
|
+
severity: "warning",
|
|
2120
|
+
file: page.filePath,
|
|
2121
|
+
message: `Page is marked as orphaned`
|
|
2122
|
+
});
|
|
2123
|
+
}
|
|
2124
|
+
}
|
|
2125
|
+
return results;
|
|
2126
|
+
}
|
|
2127
|
+
async function checkMissingSummaries(root) {
|
|
2128
|
+
const pages = await collectAllPages(root);
|
|
2129
|
+
const results = [];
|
|
2130
|
+
for (const page of pages) {
|
|
2131
|
+
const { meta } = parseFrontmatter(page.content);
|
|
2132
|
+
const summary = meta.summary;
|
|
2133
|
+
const isMissing = !summary || typeof summary === "string" && summary.trim() === "";
|
|
2134
|
+
if (isMissing) {
|
|
2135
|
+
results.push({
|
|
2136
|
+
rule: "missing-summary",
|
|
2137
|
+
severity: "warning",
|
|
2138
|
+
file: page.filePath,
|
|
2139
|
+
message: `Page has no summary in frontmatter`
|
|
2140
|
+
});
|
|
2141
|
+
}
|
|
2142
|
+
}
|
|
2143
|
+
return results;
|
|
2144
|
+
}
|
|
2145
|
+
async function checkDuplicateConcepts(root) {
|
|
2146
|
+
const pages = await collectAllPages(root);
|
|
2147
|
+
const titleMap = /* @__PURE__ */ new Map();
|
|
2148
|
+
for (const page of pages) {
|
|
2149
|
+
const { meta } = parseFrontmatter(page.content);
|
|
2150
|
+
const title = typeof meta.title === "string" ? meta.title : "";
|
|
2151
|
+
if (!title) continue;
|
|
2152
|
+
const normalizedTitle = title.toLowerCase().trim();
|
|
2153
|
+
const existing = titleMap.get(normalizedTitle) ?? [];
|
|
2154
|
+
existing.push(page.filePath);
|
|
2155
|
+
titleMap.set(normalizedTitle, existing);
|
|
2156
|
+
}
|
|
2157
|
+
const results = [];
|
|
2158
|
+
for (const [title, files] of titleMap) {
|
|
2159
|
+
if (files.length <= 1) continue;
|
|
2160
|
+
for (const file of files) {
|
|
2161
|
+
results.push({
|
|
2162
|
+
rule: "duplicate-concept",
|
|
2163
|
+
severity: "error",
|
|
2164
|
+
file,
|
|
2165
|
+
message: `Duplicate title "${title}" \u2014 also in ${files.filter((f) => f !== file).join(", ")}`
|
|
2166
|
+
});
|
|
2167
|
+
}
|
|
2168
|
+
}
|
|
2169
|
+
return results;
|
|
2170
|
+
}
|
|
2171
|
+
async function checkEmptyPages(root) {
|
|
2172
|
+
const pages = await collectAllPages(root);
|
|
2173
|
+
const results = [];
|
|
2174
|
+
for (const page of pages) {
|
|
2175
|
+
const { meta, body } = parseFrontmatter(page.content);
|
|
2176
|
+
const hasTitle = typeof meta.title === "string" && meta.title.trim() !== "";
|
|
2177
|
+
const isBodyEmpty = body.trim().length < MIN_BODY_LENGTH;
|
|
2178
|
+
if (hasTitle && isBodyEmpty) {
|
|
2179
|
+
results.push({
|
|
2180
|
+
rule: "empty-page",
|
|
2181
|
+
severity: "warning",
|
|
2182
|
+
file: page.filePath,
|
|
2183
|
+
message: `Page body is empty or too short (< ${MIN_BODY_LENGTH} chars)`
|
|
2184
|
+
});
|
|
2185
|
+
}
|
|
2186
|
+
}
|
|
2187
|
+
return results;
|
|
2188
|
+
}
|
|
2189
|
+
function stripSpanSuffix(entry) {
|
|
2190
|
+
const colonIdx = entry.indexOf(":");
|
|
2191
|
+
const hashIdx = entry.indexOf("#");
|
|
2192
|
+
const cuts = [colonIdx, hashIdx].filter((i) => i >= 0);
|
|
2193
|
+
if (cuts.length === 0) return entry;
|
|
2194
|
+
return entry.slice(0, Math.min(...cuts));
|
|
2195
|
+
}
|
|
2196
|
+
async function checkLowConfidencePages(root) {
|
|
2197
|
+
const pages = await collectAllPages(root);
|
|
2198
|
+
const results = [];
|
|
2199
|
+
for (const page of pages) {
|
|
2200
|
+
const { meta } = parseFrontmatter(page.content);
|
|
2201
|
+
const { confidence } = parseProvenanceMetadata(meta);
|
|
2202
|
+
if (confidence === void 0 || confidence >= LOW_CONFIDENCE_THRESHOLD) continue;
|
|
2203
|
+
results.push({
|
|
2204
|
+
rule: "low-confidence",
|
|
2205
|
+
severity: "warning",
|
|
2206
|
+
file: page.filePath,
|
|
2207
|
+
message: `Page confidence ${confidence.toFixed(2)} is below ${LOW_CONFIDENCE_THRESHOLD}`
|
|
2208
|
+
});
|
|
2209
|
+
}
|
|
2210
|
+
return results;
|
|
2211
|
+
}
|
|
2212
|
+
async function checkContradictedPages(root) {
|
|
2213
|
+
const pages = await collectAllPages(root);
|
|
2214
|
+
const results = [];
|
|
2215
|
+
for (const page of pages) {
|
|
2216
|
+
const { meta } = parseFrontmatter(page.content);
|
|
2217
|
+
const { contradictedBy } = parseProvenanceMetadata(meta);
|
|
2218
|
+
if (!contradictedBy || contradictedBy.length === 0) continue;
|
|
2219
|
+
const slugs = contradictedBy.map((r) => r.slug).join(", ");
|
|
2220
|
+
results.push({
|
|
2221
|
+
rule: "contradicted-page",
|
|
2222
|
+
severity: "warning",
|
|
2223
|
+
file: page.filePath,
|
|
2224
|
+
message: `Page contradicts: ${slugs}`
|
|
2225
|
+
});
|
|
2226
|
+
}
|
|
2227
|
+
return results;
|
|
2228
|
+
}
|
|
2229
|
+
async function checkInferredWithoutCitations(root) {
|
|
2230
|
+
const pages = await collectAllPages(root);
|
|
2231
|
+
const results = [];
|
|
2232
|
+
for (const page of pages) {
|
|
2233
|
+
const { meta, body } = parseFrontmatter(page.content);
|
|
2234
|
+
const provenance = parseProvenanceMetadata(meta);
|
|
2235
|
+
const inferred = provenance.inferredParagraphs ?? countUncitedProseParagraphs(body);
|
|
2236
|
+
if (inferred <= MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS) continue;
|
|
2237
|
+
results.push({
|
|
2238
|
+
rule: "excess-inferred-paragraphs",
|
|
2239
|
+
severity: "warning",
|
|
2240
|
+
file: page.filePath,
|
|
2241
|
+
message: `Page has ${inferred} inferred paragraphs without citations (max ${MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS})`
|
|
2242
|
+
});
|
|
2243
|
+
}
|
|
2244
|
+
return results;
|
|
2245
|
+
}
|
|
2246
|
+
var PROSE_PARAGRAPH_LEAD = /^[A-Za-z]/;
|
|
2247
|
+
function countUncitedProseParagraphs(body) {
|
|
2248
|
+
const paragraphs = body.split(/\n\s*\n/);
|
|
2249
|
+
let count = 0;
|
|
2250
|
+
for (const block of paragraphs) {
|
|
2251
|
+
const trimmed = block.trim();
|
|
2252
|
+
if (trimmed.length === 0) continue;
|
|
2253
|
+
if (!PROSE_PARAGRAPH_LEAD.test(trimmed)) continue;
|
|
2254
|
+
if (CITATION_PATTERN.test(trimmed)) {
|
|
2255
|
+
CITATION_PATTERN.lastIndex = 0;
|
|
2256
|
+
continue;
|
|
2257
|
+
}
|
|
2258
|
+
CITATION_PATTERN.lastIndex = 0;
|
|
2259
|
+
count += 1;
|
|
2260
|
+
}
|
|
2261
|
+
return count;
|
|
2262
|
+
}
|
|
2263
|
+
var COLON_SPAN_PATTERN = /^[^:#]+:(\d+)(?:-(\d+))?$/;
|
|
2264
|
+
var HASH_SPAN_PATTERN = /^[^:#]+#L(\d+)(?:-L(\d+))?$/;
|
|
2265
|
+
async function checkSchemaCrossLinks(root, schema) {
|
|
2266
|
+
const pages = await collectAllPages(root);
|
|
2267
|
+
const results = [];
|
|
2268
|
+
for (const page of pages) {
|
|
2269
|
+
const { meta, body } = parseFrontmatter(page.content);
|
|
2270
|
+
const kind = resolvePageKind(meta.kind, schema);
|
|
2271
|
+
const rule = schema.kinds[kind];
|
|
2272
|
+
if (rule.minWikilinks <= 0) continue;
|
|
2273
|
+
const linkCount = countWikilinks(body);
|
|
2274
|
+
if (linkCount >= rule.minWikilinks) continue;
|
|
2275
|
+
results.push({
|
|
2276
|
+
rule: "schema-cross-link-minimum",
|
|
2277
|
+
severity: "warning",
|
|
2278
|
+
file: page.filePath,
|
|
2279
|
+
message: `Page kind "${kind}" requires at least ${rule.minWikilinks} [[wikilinks]] but only ${linkCount} found.`
|
|
2280
|
+
});
|
|
2281
|
+
}
|
|
2282
|
+
return results;
|
|
2283
|
+
}
|
|
2284
|
+
function checkPageCrossLinks(content, filePath, schema) {
|
|
2285
|
+
const { meta, body } = parseFrontmatter(content);
|
|
2286
|
+
const kind = resolvePageKind(meta.kind, schema);
|
|
2287
|
+
const rule = schema.kinds[kind];
|
|
2288
|
+
if (rule.minWikilinks <= 0) return [];
|
|
2289
|
+
const linkCount = countWikilinks(body);
|
|
2290
|
+
if (linkCount >= rule.minWikilinks) return [];
|
|
2291
|
+
return [
|
|
2292
|
+
{
|
|
2293
|
+
rule: "schema-cross-link-minimum",
|
|
2294
|
+
severity: "warning",
|
|
2295
|
+
file: filePath,
|
|
2296
|
+
message: `Page kind "${kind}" requires at least ${rule.minWikilinks} [[wikilinks]] but only ${linkCount} found.`
|
|
2297
|
+
}
|
|
2298
|
+
];
|
|
2299
|
+
}
|
|
2300
|
+
function parseLineRange(entry) {
|
|
2301
|
+
const colonMatch = COLON_SPAN_PATTERN.exec(entry);
|
|
2302
|
+
if (colonMatch) {
|
|
2303
|
+
const start = Number(colonMatch[1]);
|
|
2304
|
+
const end = colonMatch[2] !== void 0 ? Number(colonMatch[2]) : start;
|
|
2305
|
+
return { start, end };
|
|
2306
|
+
}
|
|
2307
|
+
const hashMatch = HASH_SPAN_PATTERN.exec(entry);
|
|
2308
|
+
if (hashMatch) {
|
|
2309
|
+
const start = Number(hashMatch[1]);
|
|
2310
|
+
const end = hashMatch[2] !== void 0 ? Number(hashMatch[2]) : start;
|
|
2311
|
+
return { start, end };
|
|
2312
|
+
}
|
|
2313
|
+
return null;
|
|
2314
|
+
}
|
|
2315
|
+
function countLines(content) {
|
|
2316
|
+
if (content.length === 0) return 0;
|
|
2317
|
+
return content.split("\n").length;
|
|
2318
|
+
}
|
|
2319
|
+
async function checkBrokenCitations(root) {
|
|
2320
|
+
const pages = await collectAllPages(root);
|
|
2321
|
+
const sourcesDir = path16.join(root, SOURCES_DIR);
|
|
2322
|
+
const results = [];
|
|
2323
|
+
const lineCountCache = /* @__PURE__ */ new Map();
|
|
2324
|
+
for (const page of pages) {
|
|
2325
|
+
for (const { captured, line } of findMatchesInContent(page.content, CITATION_PATTERN)) {
|
|
2326
|
+
await collectBrokenForMarker(captured, line, page.filePath, sourcesDir, lineCountCache, results);
|
|
2327
|
+
}
|
|
2328
|
+
}
|
|
2329
|
+
return results;
|
|
2330
|
+
}
|
|
2331
|
+
async function collectBrokenForMarker(captured, line, pageFile, sourcesDir, lineCountCache, out) {
|
|
2332
|
+
for (const part of captured.split(",")) {
|
|
2333
|
+
const trimmed = part.trim();
|
|
2334
|
+
if (trimmed.length === 0) continue;
|
|
2335
|
+
const filename = stripSpanSuffix(trimmed);
|
|
2336
|
+
const citedPath = path16.join(sourcesDir, filename);
|
|
2337
|
+
if (!existsSync6(citedPath)) {
|
|
2338
|
+
out.push({
|
|
2339
|
+
rule: "broken-citation",
|
|
2340
|
+
severity: "error",
|
|
2341
|
+
file: pageFile,
|
|
2342
|
+
message: `Broken citation ^[${filename}] \u2014 source file not found`,
|
|
2343
|
+
line
|
|
2344
|
+
});
|
|
2345
|
+
continue;
|
|
2346
|
+
}
|
|
2347
|
+
const range = parseLineRange(trimmed);
|
|
2348
|
+
if (range === null) continue;
|
|
2349
|
+
const lineCount = await resolveLineCount(citedPath, filename, lineCountCache);
|
|
2350
|
+
if (range.end <= lineCount) continue;
|
|
2351
|
+
out.push({
|
|
2352
|
+
rule: "broken-citation",
|
|
2353
|
+
severity: "error",
|
|
2354
|
+
file: pageFile,
|
|
2355
|
+
message: `Claim-level span ^[${trimmed}] is out of bounds (source has only ${lineCount} lines)`,
|
|
2356
|
+
line
|
|
2357
|
+
});
|
|
2358
|
+
}
|
|
2359
|
+
}
|
|
2360
|
+
async function resolveLineCount(citedPath, filename, cache) {
|
|
2361
|
+
const cached = cache.get(filename);
|
|
2362
|
+
if (cached !== void 0) return cached;
|
|
2363
|
+
const content = await safeReadFile(citedPath);
|
|
2364
|
+
const lineCount = countLines(content);
|
|
2365
|
+
cache.set(filename, lineCount);
|
|
2366
|
+
return lineCount;
|
|
2367
|
+
}
|
|
2368
|
+
async function checkMalformedClaimCitations(root) {
|
|
2369
|
+
const pages = await collectAllPages(root);
|
|
2370
|
+
const results = [];
|
|
2371
|
+
for (const page of pages) {
|
|
2372
|
+
for (const { captured, line } of findMatchesInContent(page.content, CITATION_PATTERN)) {
|
|
2373
|
+
for (const part of captured.split(",")) {
|
|
2374
|
+
if (!isMalformedCitationEntry(part)) continue;
|
|
2375
|
+
results.push({
|
|
2376
|
+
rule: "malformed-claim-citation",
|
|
2377
|
+
severity: "error",
|
|
2378
|
+
file: page.filePath,
|
|
2379
|
+
message: `Malformed claim citation ^[${captured}] \u2014 expected file.md, file.md:N-N, or file.md#LN-LN`,
|
|
2380
|
+
line
|
|
2381
|
+
});
|
|
2382
|
+
}
|
|
2383
|
+
}
|
|
2384
|
+
}
|
|
2385
|
+
return results;
|
|
2386
|
+
}
|
|
2387
|
+
|
|
2388
|
+
// src/compiler/page-renderer.ts
|
|
2389
|
+
import { readdir as readdir8 } from "fs/promises";
|
|
2390
|
+
import path17 from "path";
|
|
2391
|
+
|
|
2392
|
+
// src/compiler/provenance.ts
|
|
2393
|
+
function addProvenanceMeta(fields, concept) {
|
|
2394
|
+
if (typeof concept.confidence === "number") {
|
|
2395
|
+
fields.confidence = concept.confidence;
|
|
2396
|
+
}
|
|
2397
|
+
if (concept.provenanceState) {
|
|
2398
|
+
fields.provenanceState = concept.provenanceState;
|
|
2399
|
+
}
|
|
2400
|
+
if (concept.contradictedBy && concept.contradictedBy.length > 0) {
|
|
2401
|
+
fields.contradictedBy = concept.contradictedBy;
|
|
2402
|
+
}
|
|
2403
|
+
if (typeof concept.inferredParagraphs === "number") {
|
|
2404
|
+
fields.inferredParagraphs = concept.inferredParagraphs;
|
|
2405
|
+
}
|
|
2406
|
+
}
|
|
2407
|
+
function reportContradictionWarnings(conceptTitle, concept) {
|
|
2408
|
+
const refs = concept.contradictedBy;
|
|
2409
|
+
if (!refs || refs.length === 0) return;
|
|
2410
|
+
const slugs = refs.map((r) => r.slug).join(", ");
|
|
2411
|
+
status(
|
|
2412
|
+
"!",
|
|
2413
|
+
warn(`Contradiction reported on "${conceptTitle}" \u2014 conflicts with: ${slugs}`)
|
|
2414
|
+
);
|
|
2415
|
+
}
|
|
2416
|
+
|
|
2417
|
+
// src/compiler/page-renderer.ts
|
|
2418
|
+
var RELATED_PAGE_CONTEXT_LIMIT = 5;
|
|
2419
|
+
async function renderMergedPageContent(root, entry, schema) {
|
|
2420
|
+
const pagePath = path17.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
|
|
2421
|
+
const existingPage = await safeReadFile(pagePath);
|
|
2422
|
+
const relatedPages = await loadRelatedPages(root, entry.slug);
|
|
2423
|
+
const system = buildPagePrompt(
|
|
2424
|
+
entry.concept.concept,
|
|
2425
|
+
entry.combinedContent,
|
|
2426
|
+
existingPage,
|
|
2427
|
+
relatedPages
|
|
2428
|
+
);
|
|
2429
|
+
const pageBody = await callClaude({
|
|
2430
|
+
system,
|
|
2431
|
+
messages: [
|
|
2432
|
+
{ role: "user", content: `Write the wiki page for "${entry.concept.concept}".` }
|
|
2433
|
+
]
|
|
2434
|
+
});
|
|
2435
|
+
const frontmatter = buildMergedFrontmatter(entry, existingPage, schema);
|
|
2436
|
+
reportContradictionWarnings(entry.concept.concept, entry.concept);
|
|
2437
|
+
return `${frontmatter}
|
|
2438
|
+
|
|
2439
|
+
${pageBody}
|
|
2440
|
+
`;
|
|
2441
|
+
}
|
|
2442
|
+
function buildMergedFrontmatter(entry, existingPage, schema) {
|
|
2443
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
2444
|
+
const existing = existingPage ? parseFrontmatter(existingPage) : null;
|
|
2445
|
+
const createdAt = existing?.meta.createdAt && typeof existing.meta.createdAt === "string" ? existing.meta.createdAt : now;
|
|
2446
|
+
const frontmatterFields = {
|
|
2447
|
+
title: entry.concept.concept,
|
|
2448
|
+
summary: entry.concept.summary,
|
|
2449
|
+
sources: entry.sourceFiles,
|
|
2450
|
+
kind: schema.defaultKind,
|
|
2451
|
+
createdAt,
|
|
2452
|
+
updatedAt: now
|
|
2453
|
+
};
|
|
2454
|
+
addObsidianMeta(frontmatterFields, entry.concept.concept, entry.concept.tags ?? []);
|
|
2455
|
+
addProvenanceMeta(frontmatterFields, entry.concept);
|
|
2456
|
+
return buildFrontmatter(frontmatterFields);
|
|
2457
|
+
}
|
|
2458
|
+
async function loadRelatedPages(root, excludeSlug) {
|
|
2459
|
+
const conceptsPath = path17.join(root, CONCEPTS_DIR);
|
|
2460
|
+
let files;
|
|
2461
|
+
try {
|
|
2462
|
+
files = await readdir8(conceptsPath);
|
|
2463
|
+
} catch {
|
|
2464
|
+
return "";
|
|
2465
|
+
}
|
|
2466
|
+
const related = files.filter((f) => f.endsWith(".md") && f !== `${excludeSlug}.md`).slice(0, RELATED_PAGE_CONTEXT_LIMIT);
|
|
2467
|
+
const contents = [];
|
|
2468
|
+
for (const f of related) {
|
|
2469
|
+
const content = await safeReadFile(path17.join(conceptsPath, f));
|
|
2470
|
+
if (!content) continue;
|
|
2471
|
+
const { meta } = parseFrontmatter(content);
|
|
2472
|
+
if (meta.orphaned) continue;
|
|
2473
|
+
contents.push(content);
|
|
2474
|
+
}
|
|
2475
|
+
return contents.join("\n\n---\n\n");
|
|
2476
|
+
}
|
|
2477
|
+
|
|
2478
|
+
// src/compiler/index.ts
|
|
2479
|
+
import pLimit from "p-limit";
|
|
2480
|
+
function emptyCompileResult() {
|
|
2481
|
+
return { compiled: 0, skipped: 0, deleted: 0, concepts: [], pages: [], errors: [] };
|
|
2482
|
+
}
|
|
2483
|
+
async function compile(root, options = {}) {
|
|
2484
|
+
await compileAndReport(root, options);
|
|
2485
|
+
}
|
|
2486
|
+
async function compileAndReport(root, options = {}) {
|
|
2487
|
+
header("llmwiki compile");
|
|
2488
|
+
const locked = await acquireLock(root);
|
|
2489
|
+
if (!locked) {
|
|
2490
|
+
status("!", error("Could not acquire lock. Try again later."));
|
|
2491
|
+
return {
|
|
2492
|
+
...emptyCompileResult(),
|
|
2493
|
+
errors: ["Could not acquire .llmwiki/lock \u2014 another compile is in progress."]
|
|
2494
|
+
};
|
|
2495
|
+
}
|
|
2496
|
+
try {
|
|
2497
|
+
return await runCompilePipeline(root, options);
|
|
2498
|
+
} finally {
|
|
2499
|
+
await releaseLock(root);
|
|
2500
|
+
}
|
|
2501
|
+
}
|
|
2502
|
+
function bucketChanges(changes) {
|
|
2503
|
+
return {
|
|
2504
|
+
toCompile: changes.filter((c) => c.status === "new" || c.status === "changed"),
|
|
2505
|
+
deleted: changes.filter((c) => c.status === "deleted"),
|
|
2506
|
+
unchanged: changes.filter((c) => c.status === "unchanged")
|
|
2507
|
+
};
|
|
2508
|
+
}
|
|
2509
|
+
async function generatePagesPhase(root, extractions, frozenSlugs, schema, options) {
|
|
2510
|
+
const merged = mergeExtractions(extractions, frozenSlugs);
|
|
2511
|
+
const sourceStates = options.review ? await buildExtractionSourceStates(root, extractions) : {};
|
|
2512
|
+
const limit = pLimit(COMPILE_CONCURRENCY);
|
|
2513
|
+
const errors = [];
|
|
2514
|
+
const candidates = [];
|
|
2515
|
+
const pages = await Promise.all(
|
|
2516
|
+
merged.map((entry) => limit(async () => {
|
|
2517
|
+
const result = await generateMergedPage(root, entry, schema, options, sourceStates);
|
|
2518
|
+
if (result.error) errors.push(result.error);
|
|
2519
|
+
if (result.candidateId) candidates.push(result.candidateId);
|
|
2520
|
+
return entry;
|
|
2521
|
+
}))
|
|
2522
|
+
);
|
|
2523
|
+
return { pages, errors, candidates };
|
|
2524
|
+
}
|
|
2525
|
+
async function persistExtractionStates(root, extractions) {
|
|
2526
|
+
for (const result of extractions) {
|
|
2527
|
+
if (result.concepts.length === 0) continue;
|
|
1598
2528
|
await persistSourceState(root, result.sourcePath, result.sourceFile, result.concepts);
|
|
1599
2529
|
}
|
|
1600
2530
|
}
|
|
1601
|
-
function summarizeCompile(buckets, generation, extractions) {
|
|
2531
|
+
function summarizeCompile(buckets, generation, extractions, options) {
|
|
1602
2532
|
header("Compilation complete");
|
|
1603
2533
|
status("\u2713", success(
|
|
1604
2534
|
`${buckets.toCompile.length} compiled, ${buckets.unchanged.length} skipped, ${buckets.deleted.length} deleted`
|
|
1605
2535
|
));
|
|
1606
|
-
if (
|
|
2536
|
+
if (options.review && generation.candidates.length > 0) {
|
|
2537
|
+
status("?", info(
|
|
2538
|
+
`${generation.candidates.length} candidate(s) awaiting review \u2014 run \`llmwiki review list\``
|
|
2539
|
+
));
|
|
2540
|
+
} else if (buckets.toCompile.length > 0) {
|
|
1607
2541
|
status("\u2192", dim('Next: llmwiki query "your question here"'));
|
|
1608
2542
|
}
|
|
1609
2543
|
const errors = [...generation.errors];
|
|
@@ -1612,7 +2546,7 @@ function summarizeCompile(buckets, generation, extractions) {
|
|
|
1612
2546
|
errors.push(`No concepts extracted from ${result.sourceFile}`);
|
|
1613
2547
|
}
|
|
1614
2548
|
}
|
|
1615
|
-
|
|
2549
|
+
const baseResult = {
|
|
1616
2550
|
compiled: buckets.toCompile.length,
|
|
1617
2551
|
skipped: buckets.unchanged.length,
|
|
1618
2552
|
deleted: buckets.deleted.length,
|
|
@@ -1620,30 +2554,58 @@ function summarizeCompile(buckets, generation, extractions) {
|
|
|
1620
2554
|
pages: generation.pages.map((entry) => entry.slug),
|
|
1621
2555
|
errors
|
|
1622
2556
|
};
|
|
2557
|
+
if (options.review) {
|
|
2558
|
+
baseResult.candidates = generation.candidates;
|
|
2559
|
+
}
|
|
2560
|
+
return baseResult;
|
|
1623
2561
|
}
|
|
1624
|
-
async function runCompilePipeline(root) {
|
|
2562
|
+
async function runCompilePipeline(root, options) {
|
|
2563
|
+
const schema = await loadSchema(root);
|
|
2564
|
+
reportSchemaStatus(schema);
|
|
1625
2565
|
const state = await readState(root);
|
|
1626
2566
|
const changes = await detectChanges(root, state);
|
|
1627
2567
|
augmentWithAffectedSources(changes, findAffectedSources(state, changes));
|
|
1628
2568
|
const buckets = bucketChanges(changes);
|
|
1629
2569
|
if (buckets.toCompile.length === 0 && buckets.deleted.length === 0) {
|
|
1630
2570
|
status("\u2713", success("Nothing to compile \u2014 all sources up to date."));
|
|
2571
|
+
if (!options.review) {
|
|
2572
|
+
const emptyGeneration = { pages: [], errors: [], candidates: [] };
|
|
2573
|
+
await generateSeedPages(root, schema, emptyGeneration);
|
|
2574
|
+
await finalizeWiki(root, emptyGeneration.pages);
|
|
2575
|
+
return {
|
|
2576
|
+
...emptyCompileResult(),
|
|
2577
|
+
skipped: buckets.unchanged.length,
|
|
2578
|
+
errors: emptyGeneration.errors
|
|
2579
|
+
};
|
|
2580
|
+
}
|
|
1631
2581
|
return { ...emptyCompileResult(), skipped: buckets.unchanged.length };
|
|
1632
2582
|
}
|
|
1633
2583
|
printChangesSummary(changes);
|
|
1634
|
-
|
|
2584
|
+
if (!options.review) {
|
|
2585
|
+
await markDeletedAsOrphaned(root, buckets.deleted, state);
|
|
2586
|
+
}
|
|
1635
2587
|
const frozenSlugs = findFrozenSlugs(state, changes);
|
|
1636
2588
|
reportFrozenSlugs(frozenSlugs);
|
|
1637
2589
|
const extractions = await runExtractionPhases(root, buckets.toCompile, state, changes);
|
|
1638
|
-
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
|
|
1642
|
-
|
|
2590
|
+
if (!options.review) {
|
|
2591
|
+
await freezeFailedExtractions(root, extractions, frozenSlugs);
|
|
2592
|
+
}
|
|
2593
|
+
const generation = await generatePagesPhase(root, extractions, frozenSlugs, schema, options);
|
|
2594
|
+
if (!options.review) {
|
|
2595
|
+
await persistExtractionStates(root, extractions);
|
|
2596
|
+
if (frozenSlugs.size > 0) {
|
|
2597
|
+
await orphanUnownedFrozenPages(root, frozenSlugs);
|
|
2598
|
+
}
|
|
2599
|
+
await persistFrozenSlugs(root, frozenSlugs, extractions);
|
|
2600
|
+
await generateSeedPages(root, schema, generation);
|
|
2601
|
+
await finalizeWiki(root, generation.pages);
|
|
2602
|
+
}
|
|
2603
|
+
return summarizeCompile(buckets, generation, extractions, options);
|
|
2604
|
+
}
|
|
2605
|
+
function reportSchemaStatus(schema) {
|
|
2606
|
+
if (schema.loadedFrom) {
|
|
2607
|
+
status("i", dim(`Schema: ${schema.loadedFrom}`));
|
|
1643
2608
|
}
|
|
1644
|
-
await persistFrozenSlugs(root, frozenSlugs, extractions);
|
|
1645
|
-
await finalizeWiki(root, generation.pages);
|
|
1646
|
-
return summarizeCompile(buckets, generation, extractions);
|
|
1647
2609
|
}
|
|
1648
2610
|
function augmentWithAffectedSources(changes, affected) {
|
|
1649
2611
|
for (const file of affected) {
|
|
@@ -1705,9 +2667,9 @@ function printChangesSummary(changes) {
|
|
|
1705
2667
|
}
|
|
1706
2668
|
async function extractForSource(root, sourceFile) {
|
|
1707
2669
|
status("*", info(`Extracting: ${sourceFile}`));
|
|
1708
|
-
const sourcePath =
|
|
1709
|
-
const sourceContent = await
|
|
1710
|
-
const existingIndex = await safeReadFile(
|
|
2670
|
+
const sourcePath = path18.join(root, SOURCES_DIR, sourceFile);
|
|
2671
|
+
const sourceContent = await readFile10(sourcePath, "utf-8");
|
|
2672
|
+
const existingIndex = await safeReadFile(path18.join(root, INDEX_FILE));
|
|
1711
2673
|
const concepts = await extractConcepts(sourceContent, existingIndex);
|
|
1712
2674
|
if (concepts.length > 0) {
|
|
1713
2675
|
const names = concepts.map((c) => c.concept).join(", ");
|
|
@@ -1715,6 +2677,26 @@ async function extractForSource(root, sourceFile) {
|
|
|
1715
2677
|
}
|
|
1716
2678
|
return { sourceFile, sourcePath, sourceContent, concepts };
|
|
1717
2679
|
}
|
|
2680
|
+
function reconcileConceptMetadata(existing, incoming) {
|
|
2681
|
+
const reconciled = { ...existing };
|
|
2682
|
+
if (typeof incoming.confidence === "number") {
|
|
2683
|
+
reconciled.confidence = typeof existing.confidence === "number" ? Math.min(existing.confidence, incoming.confidence) : incoming.confidence;
|
|
2684
|
+
}
|
|
2685
|
+
reconciled.provenanceState = "merged";
|
|
2686
|
+
const refs = [...existing.contradictedBy ?? []];
|
|
2687
|
+
const seenSlugs = new Set(refs.map((r) => r.slug));
|
|
2688
|
+
for (const ref of incoming.contradictedBy ?? []) {
|
|
2689
|
+
if (!seenSlugs.has(ref.slug)) {
|
|
2690
|
+
refs.push(ref);
|
|
2691
|
+
seenSlugs.add(ref.slug);
|
|
2692
|
+
}
|
|
2693
|
+
}
|
|
2694
|
+
reconciled.contradictedBy = refs.length > 0 ? refs : void 0;
|
|
2695
|
+
if (typeof incoming.inferredParagraphs === "number") {
|
|
2696
|
+
reconciled.inferredParagraphs = typeof existing.inferredParagraphs === "number" ? Math.max(existing.inferredParagraphs, incoming.inferredParagraphs) : incoming.inferredParagraphs;
|
|
2697
|
+
}
|
|
2698
|
+
return reconciled;
|
|
2699
|
+
}
|
|
1718
2700
|
function mergeExtractions(extractions, frozenSlugs) {
|
|
1719
2701
|
const bySlug = /* @__PURE__ */ new Map();
|
|
1720
2702
|
for (const result of extractions) {
|
|
@@ -1724,6 +2706,7 @@ function mergeExtractions(extractions, frozenSlugs) {
|
|
|
1724
2706
|
if (frozenSlugs.has(slug)) continue;
|
|
1725
2707
|
const existing = bySlug.get(slug);
|
|
1726
2708
|
if (existing) {
|
|
2709
|
+
existing.concept = reconcileConceptMetadata(existing.concept, concept);
|
|
1727
2710
|
existing.sourceFiles.push(result.sourceFile);
|
|
1728
2711
|
existing.combinedContent += `
|
|
1729
2712
|
|
|
@@ -1744,68 +2727,86 @@ ${result.sourceContent}`
|
|
|
1744
2727
|
}
|
|
1745
2728
|
return Array.from(bySlug.values());
|
|
1746
2729
|
}
|
|
1747
|
-
async function generateMergedPage(root, entry) {
|
|
1748
|
-
const
|
|
1749
|
-
|
|
1750
|
-
|
|
1751
|
-
|
|
1752
|
-
|
|
1753
|
-
|
|
1754
|
-
|
|
1755
|
-
|
|
1756
|
-
|
|
2730
|
+
async function generateMergedPage(root, entry, schema, options, sourceStates) {
|
|
2731
|
+
const fullPage = await renderMergedPageContent(root, entry, schema);
|
|
2732
|
+
if (options.review) {
|
|
2733
|
+
return await persistReviewCandidate(root, entry, fullPage, sourceStates, schema);
|
|
2734
|
+
}
|
|
2735
|
+
const pagePath = path18.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
|
|
2736
|
+
const error2 = await writePageIfValid(pagePath, fullPage, entry.concept.concept);
|
|
2737
|
+
return { error: error2 ?? void 0 };
|
|
2738
|
+
}
|
|
2739
|
+
async function persistReviewCandidate(root, entry, fullPage, sourceStates, schema) {
|
|
2740
|
+
const virtualPath = `wiki/concepts/${entry.slug}.md`;
|
|
2741
|
+
const violations = checkPageCrossLinks(fullPage, virtualPath, schema);
|
|
2742
|
+
const candidate = await writeCandidate(root, {
|
|
2743
|
+
title: entry.concept.concept,
|
|
2744
|
+
slug: entry.slug,
|
|
2745
|
+
summary: entry.concept.summary,
|
|
2746
|
+
sources: entry.sourceFiles,
|
|
2747
|
+
body: fullPage,
|
|
2748
|
+
sourceStates: pickStatesForSources(sourceStates, entry.sourceFiles),
|
|
2749
|
+
schemaViolations: violations.length > 0 ? violations : void 0
|
|
2750
|
+
});
|
|
2751
|
+
status("?", info(`Candidate ready: ${candidate.id} (${entry.slug})`));
|
|
2752
|
+
return { candidateId: candidate.id };
|
|
2753
|
+
}
|
|
2754
|
+
async function generateSeedPages(root, schema, generation) {
|
|
2755
|
+
if (schema.seedPages.length === 0) return;
|
|
2756
|
+
for (const seed of schema.seedPages) {
|
|
2757
|
+
const error2 = await generateSingleSeedPage(root, schema, seed);
|
|
2758
|
+
if (error2) generation.errors.push(error2);
|
|
2759
|
+
}
|
|
2760
|
+
}
|
|
2761
|
+
async function generateSingleSeedPage(root, schema, seed) {
|
|
2762
|
+
const slug = slugify(seed.title);
|
|
2763
|
+
const pagePath = path18.join(root, CONCEPTS_DIR, `${slug}.md`);
|
|
2764
|
+
const relatedContent = await loadSeedRelatedPages(root, seed.relatedSlugs ?? []);
|
|
2765
|
+
const rule = schema.kinds[seed.kind];
|
|
2766
|
+
const system = buildSeedPagePrompt(seed, rule, relatedContent);
|
|
1757
2767
|
const pageBody = await callClaude({
|
|
1758
2768
|
system,
|
|
1759
|
-
messages: [
|
|
1760
|
-
{ role: "user", content: `Write the wiki page for "${entry.concept.concept}".` }
|
|
1761
|
-
]
|
|
2769
|
+
messages: [{ role: "user", content: `Write the ${seed.kind} page titled "${seed.title}".` }]
|
|
1762
2770
|
});
|
|
1763
2771
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1764
|
-
const existing =
|
|
1765
|
-
const
|
|
1766
|
-
const
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
2772
|
+
const existing = await safeReadFile(pagePath);
|
|
2773
|
+
const existingMeta = existing ? parseFrontmatter(existing).meta : null;
|
|
2774
|
+
const createdAt = typeof existingMeta?.createdAt === "string" ? existingMeta.createdAt : now;
|
|
2775
|
+
const typedFields = {
|
|
2776
|
+
title: seed.title,
|
|
2777
|
+
summary: seed.summary,
|
|
2778
|
+
sources: [],
|
|
2779
|
+
kind: seed.kind,
|
|
1770
2780
|
createdAt,
|
|
1771
2781
|
updatedAt: now
|
|
1772
2782
|
};
|
|
1773
|
-
|
|
2783
|
+
const frontmatterFields = { ...typedFields };
|
|
2784
|
+
addObsidianMeta(frontmatterFields, seed.title, []);
|
|
1774
2785
|
const frontmatter = buildFrontmatter(frontmatterFields);
|
|
1775
|
-
|
|
2786
|
+
return await writePageIfValid(pagePath, `${frontmatter}
|
|
1776
2787
|
|
|
1777
2788
|
${pageBody}
|
|
1778
|
-
|
|
1779
|
-
return await writePageIfValid(pagePath, fullPage, entry.concept.concept);
|
|
2789
|
+
`, seed.title);
|
|
1780
2790
|
}
|
|
1781
|
-
async function
|
|
1782
|
-
|
|
1783
|
-
const rawOutput = await callClaude({
|
|
1784
|
-
system,
|
|
1785
|
-
messages: [{ role: "user", content: "Extract the key concepts from this source." }],
|
|
1786
|
-
tools: [CONCEPT_EXTRACTION_TOOL]
|
|
1787
|
-
});
|
|
1788
|
-
return parseConcepts(rawOutput);
|
|
1789
|
-
}
|
|
1790
|
-
async function loadRelatedPages(root, excludeSlug) {
|
|
1791
|
-
const conceptsPath = path13.join(root, CONCEPTS_DIR);
|
|
1792
|
-
let files;
|
|
1793
|
-
try {
|
|
1794
|
-
files = await readdir6(conceptsPath);
|
|
1795
|
-
} catch {
|
|
1796
|
-
return "";
|
|
1797
|
-
}
|
|
1798
|
-
const related = files.filter((f) => f.endsWith(".md") && f !== `${excludeSlug}.md`).slice(0, 5);
|
|
2791
|
+
async function loadSeedRelatedPages(root, slugs) {
|
|
2792
|
+
if (slugs.length === 0) return "";
|
|
1799
2793
|
const contents = [];
|
|
1800
|
-
for (const
|
|
1801
|
-
const
|
|
1802
|
-
|
|
1803
|
-
|
|
1804
|
-
if (meta.orphaned) continue;
|
|
1805
|
-
contents.push(content);
|
|
2794
|
+
for (const slug of slugs) {
|
|
2795
|
+
const pagePath = path18.join(root, CONCEPTS_DIR, `${slug}.md`);
|
|
2796
|
+
const content = await safeReadFile(pagePath);
|
|
2797
|
+
if (content) contents.push(content);
|
|
1806
2798
|
}
|
|
1807
2799
|
return contents.join("\n\n---\n\n");
|
|
1808
2800
|
}
|
|
2801
|
+
async function extractConcepts(sourceContent, existingIndex) {
|
|
2802
|
+
const system = buildExtractionPrompt(sourceContent, existingIndex);
|
|
2803
|
+
const rawOutput = await callClaude({
|
|
2804
|
+
system,
|
|
2805
|
+
messages: [{ role: "user", content: "Extract the key concepts from this source." }],
|
|
2806
|
+
tools: [CONCEPT_EXTRACTION_TOOL]
|
|
2807
|
+
});
|
|
2808
|
+
return parseConcepts(rawOutput);
|
|
2809
|
+
}
|
|
1809
2810
|
async function writePageIfValid(pagePath, content, conceptTitle) {
|
|
1810
2811
|
if (!validateWikiPage(content)) {
|
|
1811
2812
|
status("!", warn(`Invalid page for "${conceptTitle}" \u2014 skipped.`));
|
|
@@ -1833,20 +2834,20 @@ async function persistSourceState(root, sourcePath, sourceFile, concepts) {
|
|
|
1833
2834
|
}
|
|
1834
2835
|
|
|
1835
2836
|
// src/commands/compile.ts
|
|
1836
|
-
async function compileCommand() {
|
|
1837
|
-
if (!
|
|
2837
|
+
async function compileCommand(options = {}) {
|
|
2838
|
+
if (!existsSync7(SOURCES_DIR)) {
|
|
1838
2839
|
status(
|
|
1839
2840
|
"!",
|
|
1840
2841
|
warn("No sources found. Run `llmwiki ingest <url>` first.")
|
|
1841
2842
|
);
|
|
1842
2843
|
return;
|
|
1843
2844
|
}
|
|
1844
|
-
await compile(process.cwd());
|
|
2845
|
+
await compile(process.cwd(), options);
|
|
1845
2846
|
}
|
|
1846
2847
|
|
|
1847
2848
|
// src/commands/query.ts
|
|
1848
|
-
import { existsSync as
|
|
1849
|
-
import
|
|
2849
|
+
import { existsSync as existsSync8 } from "fs";
|
|
2850
|
+
import path19 from "path";
|
|
1850
2851
|
var PAGE_DIRS = [CONCEPTS_DIR, QUERIES_DIR];
|
|
1851
2852
|
var PAGE_SELECTION_TOOL = {
|
|
1852
2853
|
name: "select_pages",
|
|
@@ -1901,7 +2902,7 @@ async function selectRelevantPages(root, question) {
|
|
|
1901
2902
|
const { pages: rawPages2, reasoning: reasoning2 } = await selectPages(question, filteredIndex);
|
|
1902
2903
|
return { pages: rawPages2, rawPages: rawPages2, reasoning: reasoning2 };
|
|
1903
2904
|
}
|
|
1904
|
-
const indexContent = await safeReadFile(
|
|
2905
|
+
const indexContent = await safeReadFile(path19.join(root, INDEX_FILE));
|
|
1905
2906
|
const { pages: rawPages, reasoning } = await selectPages(question, indexContent);
|
|
1906
2907
|
return { pages: rawPages.map((p) => slugify(p)), rawPages, reasoning };
|
|
1907
2908
|
}
|
|
@@ -1919,7 +2920,7 @@ async function loadSelectedPages(root, slugs) {
|
|
|
1919
2920
|
for (const slug of slugs) {
|
|
1920
2921
|
let content = "";
|
|
1921
2922
|
for (const dir of PAGE_DIRS) {
|
|
1922
|
-
const candidate = await safeReadFile(
|
|
2923
|
+
const candidate = await safeReadFile(path19.join(root, dir, `${slug}.md`));
|
|
1923
2924
|
if (!candidate) continue;
|
|
1924
2925
|
const { meta } = parseFrontmatter(candidate);
|
|
1925
2926
|
if (meta.orphaned) continue;
|
|
@@ -1955,7 +2956,7 @@ function summarizeAnswer(answer) {
|
|
|
1955
2956
|
}
|
|
1956
2957
|
async function saveQueryPage(root, question, answer) {
|
|
1957
2958
|
const slug = slugify(question);
|
|
1958
|
-
const filePath =
|
|
2959
|
+
const filePath = path19.join(root, QUERIES_DIR, `${slug}.md`);
|
|
1959
2960
|
const frontmatter = buildFrontmatter({
|
|
1960
2961
|
title: question,
|
|
1961
2962
|
summary: summarizeAnswer(answer),
|
|
@@ -1981,7 +2982,7 @@ ${answer}
|
|
|
1981
2982
|
return slug;
|
|
1982
2983
|
}
|
|
1983
2984
|
async function generateAnswer(root, question, options = {}) {
|
|
1984
|
-
if (!
|
|
2985
|
+
if (!existsSync8(path19.join(root, INDEX_FILE))) {
|
|
1985
2986
|
throw new Error("Wiki index not found. Run `llmwiki compile` first.");
|
|
1986
2987
|
}
|
|
1987
2988
|
const { pages, reasoning } = await selectRelevantPages(root, question);
|
|
@@ -1998,7 +2999,7 @@ async function generateAnswer(root, question, options = {}) {
|
|
|
1998
2999
|
return { answer, selectedPages: pages, reasoning, saved };
|
|
1999
3000
|
}
|
|
2000
3001
|
async function queryCommand(root, question, options) {
|
|
2001
|
-
if (!
|
|
3002
|
+
if (!existsSync8(path19.join(root, INDEX_FILE))) {
|
|
2002
3003
|
status("!", error("Wiki index not found. Run `llmwiki compile` first."));
|
|
2003
3004
|
return;
|
|
2004
3005
|
}
|
|
@@ -2026,12 +3027,12 @@ async function queryCommand(root, question, options) {
|
|
|
2026
3027
|
|
|
2027
3028
|
// src/commands/watch.ts
|
|
2028
3029
|
import { watch as chokidarWatch } from "chokidar";
|
|
2029
|
-
import { existsSync as
|
|
2030
|
-
import
|
|
3030
|
+
import { existsSync as existsSync9 } from "fs";
|
|
3031
|
+
import path20 from "path";
|
|
2031
3032
|
var DEBOUNCE_MS = 500;
|
|
2032
3033
|
async function watchCommand() {
|
|
2033
|
-
const sourcesPath =
|
|
2034
|
-
if (!
|
|
3034
|
+
const sourcesPath = path20.resolve(SOURCES_DIR);
|
|
3035
|
+
if (!existsSync9(sourcesPath)) {
|
|
2035
3036
|
status(
|
|
2036
3037
|
"!",
|
|
2037
3038
|
warn("No sources/ directory found. Run `llmwiki ingest <url>` first.")
|
|
@@ -2065,7 +3066,7 @@ async function watchCommand() {
|
|
|
2065
3066
|
const scheduleCompile = (eventPath, event) => {
|
|
2066
3067
|
status(
|
|
2067
3068
|
"~",
|
|
2068
|
-
dim(`${event}: ${
|
|
3069
|
+
dim(`${event}: ${path20.basename(eventPath)}`)
|
|
2069
3070
|
);
|
|
2070
3071
|
if (debounceTimer) clearTimeout(debounceTimer);
|
|
2071
3072
|
debounceTimer = setTimeout(triggerCompile, DEBOUNCE_MS);
|
|
@@ -2079,186 +3080,30 @@ async function watchCommand() {
|
|
|
2079
3080
|
});
|
|
2080
3081
|
}
|
|
2081
3082
|
|
|
2082
|
-
// src/linter/rules.ts
|
|
2083
|
-
import { readdir as readdir7, readFile as readFile9 } from "fs/promises";
|
|
2084
|
-
import { existsSync as existsSync7 } from "fs";
|
|
2085
|
-
import path16 from "path";
|
|
2086
|
-
var MIN_BODY_LENGTH = 50;
|
|
2087
|
-
var WIKILINK_PATTERN = /\[\[([^\]]+)\]\]/g;
|
|
2088
|
-
var CITATION_PATTERN = /\^\[([^\]]+)\]/g;
|
|
2089
|
-
function findMatchesInContent(content, pattern) {
|
|
2090
|
-
const results = [];
|
|
2091
|
-
const lines = content.split("\n");
|
|
2092
|
-
for (let i = 0; i < lines.length; i++) {
|
|
2093
|
-
const matches = lines[i].matchAll(pattern);
|
|
2094
|
-
for (const match of matches) {
|
|
2095
|
-
results.push({ captured: match[1], line: i + 1 });
|
|
2096
|
-
}
|
|
2097
|
-
}
|
|
2098
|
-
return results;
|
|
2099
|
-
}
|
|
2100
|
-
async function readMarkdownFiles(dirPath) {
|
|
2101
|
-
if (!existsSync7(dirPath)) return [];
|
|
2102
|
-
const entries = await readdir7(dirPath);
|
|
2103
|
-
const mdFiles = entries.filter((f) => f.endsWith(".md"));
|
|
2104
|
-
const results = await Promise.all(
|
|
2105
|
-
mdFiles.map(async (fileName) => {
|
|
2106
|
-
const filePath = path16.join(dirPath, fileName);
|
|
2107
|
-
const content = await readFile9(filePath, "utf-8");
|
|
2108
|
-
return { filePath, content };
|
|
2109
|
-
})
|
|
2110
|
-
);
|
|
2111
|
-
return results;
|
|
2112
|
-
}
|
|
2113
|
-
async function collectAllPages(root) {
|
|
2114
|
-
const conceptPages = await readMarkdownFiles(path16.join(root, CONCEPTS_DIR));
|
|
2115
|
-
const queryPages = await readMarkdownFiles(path16.join(root, QUERIES_DIR));
|
|
2116
|
-
return [...conceptPages, ...queryPages];
|
|
2117
|
-
}
|
|
2118
|
-
function buildPageSlugSet(pages) {
|
|
2119
|
-
const slugs = /* @__PURE__ */ new Set();
|
|
2120
|
-
for (const page of pages) {
|
|
2121
|
-
const baseName = path16.basename(page.filePath, ".md");
|
|
2122
|
-
slugs.add(baseName.toLowerCase());
|
|
2123
|
-
}
|
|
2124
|
-
return slugs;
|
|
2125
|
-
}
|
|
2126
|
-
async function checkBrokenWikilinks(root) {
|
|
2127
|
-
const pages = await collectAllPages(root);
|
|
2128
|
-
const existingSlugs = buildPageSlugSet(pages);
|
|
2129
|
-
const results = [];
|
|
2130
|
-
for (const page of pages) {
|
|
2131
|
-
for (const { captured, line } of findMatchesInContent(page.content, WIKILINK_PATTERN)) {
|
|
2132
|
-
const linkSlug = slugify(captured);
|
|
2133
|
-
if (!existingSlugs.has(linkSlug)) {
|
|
2134
|
-
results.push({
|
|
2135
|
-
rule: "broken-wikilink",
|
|
2136
|
-
severity: "error",
|
|
2137
|
-
file: page.filePath,
|
|
2138
|
-
message: `Broken wikilink [[${captured}]] \u2014 no matching page found`,
|
|
2139
|
-
line
|
|
2140
|
-
});
|
|
2141
|
-
}
|
|
2142
|
-
}
|
|
2143
|
-
}
|
|
2144
|
-
return results;
|
|
2145
|
-
}
|
|
2146
|
-
async function checkOrphanedPages(root) {
|
|
2147
|
-
const pages = await collectAllPages(root);
|
|
2148
|
-
const results = [];
|
|
2149
|
-
for (const page of pages) {
|
|
2150
|
-
const { meta } = parseFrontmatter(page.content);
|
|
2151
|
-
if (meta.orphaned === true) {
|
|
2152
|
-
results.push({
|
|
2153
|
-
rule: "orphaned-page",
|
|
2154
|
-
severity: "warning",
|
|
2155
|
-
file: page.filePath,
|
|
2156
|
-
message: `Page is marked as orphaned`
|
|
2157
|
-
});
|
|
2158
|
-
}
|
|
2159
|
-
}
|
|
2160
|
-
return results;
|
|
2161
|
-
}
|
|
2162
|
-
async function checkMissingSummaries(root) {
|
|
2163
|
-
const pages = await collectAllPages(root);
|
|
2164
|
-
const results = [];
|
|
2165
|
-
for (const page of pages) {
|
|
2166
|
-
const { meta } = parseFrontmatter(page.content);
|
|
2167
|
-
const summary = meta.summary;
|
|
2168
|
-
const isMissing = !summary || typeof summary === "string" && summary.trim() === "";
|
|
2169
|
-
if (isMissing) {
|
|
2170
|
-
results.push({
|
|
2171
|
-
rule: "missing-summary",
|
|
2172
|
-
severity: "warning",
|
|
2173
|
-
file: page.filePath,
|
|
2174
|
-
message: `Page has no summary in frontmatter`
|
|
2175
|
-
});
|
|
2176
|
-
}
|
|
2177
|
-
}
|
|
2178
|
-
return results;
|
|
2179
|
-
}
|
|
2180
|
-
async function checkDuplicateConcepts(root) {
|
|
2181
|
-
const pages = await collectAllPages(root);
|
|
2182
|
-
const titleMap = /* @__PURE__ */ new Map();
|
|
2183
|
-
for (const page of pages) {
|
|
2184
|
-
const { meta } = parseFrontmatter(page.content);
|
|
2185
|
-
const title = typeof meta.title === "string" ? meta.title : "";
|
|
2186
|
-
if (!title) continue;
|
|
2187
|
-
const normalizedTitle = title.toLowerCase().trim();
|
|
2188
|
-
const existing = titleMap.get(normalizedTitle) ?? [];
|
|
2189
|
-
existing.push(page.filePath);
|
|
2190
|
-
titleMap.set(normalizedTitle, existing);
|
|
2191
|
-
}
|
|
2192
|
-
const results = [];
|
|
2193
|
-
for (const [title, files] of titleMap) {
|
|
2194
|
-
if (files.length <= 1) continue;
|
|
2195
|
-
for (const file of files) {
|
|
2196
|
-
results.push({
|
|
2197
|
-
rule: "duplicate-concept",
|
|
2198
|
-
severity: "error",
|
|
2199
|
-
file,
|
|
2200
|
-
message: `Duplicate title "${title}" \u2014 also in ${files.filter((f) => f !== file).join(", ")}`
|
|
2201
|
-
});
|
|
2202
|
-
}
|
|
2203
|
-
}
|
|
2204
|
-
return results;
|
|
2205
|
-
}
|
|
2206
|
-
async function checkEmptyPages(root) {
|
|
2207
|
-
const pages = await collectAllPages(root);
|
|
2208
|
-
const results = [];
|
|
2209
|
-
for (const page of pages) {
|
|
2210
|
-
const { meta, body } = parseFrontmatter(page.content);
|
|
2211
|
-
const hasTitle = typeof meta.title === "string" && meta.title.trim() !== "";
|
|
2212
|
-
const isBodyEmpty = body.trim().length < MIN_BODY_LENGTH;
|
|
2213
|
-
if (hasTitle && isBodyEmpty) {
|
|
2214
|
-
results.push({
|
|
2215
|
-
rule: "empty-page",
|
|
2216
|
-
severity: "warning",
|
|
2217
|
-
file: page.filePath,
|
|
2218
|
-
message: `Page body is empty or too short (< ${MIN_BODY_LENGTH} chars)`
|
|
2219
|
-
});
|
|
2220
|
-
}
|
|
2221
|
-
}
|
|
2222
|
-
return results;
|
|
2223
|
-
}
|
|
2224
|
-
async function checkBrokenCitations(root) {
|
|
2225
|
-
const pages = await collectAllPages(root);
|
|
2226
|
-
const sourcesDir = path16.join(root, SOURCES_DIR);
|
|
2227
|
-
const results = [];
|
|
2228
|
-
for (const page of pages) {
|
|
2229
|
-
for (const { captured, line } of findMatchesInContent(page.content, CITATION_PATTERN)) {
|
|
2230
|
-
const citedPath = path16.join(sourcesDir, captured);
|
|
2231
|
-
if (!existsSync7(citedPath)) {
|
|
2232
|
-
results.push({
|
|
2233
|
-
rule: "broken-citation",
|
|
2234
|
-
severity: "error",
|
|
2235
|
-
file: page.filePath,
|
|
2236
|
-
message: `Broken citation ^[${captured}] \u2014 source file not found`,
|
|
2237
|
-
line
|
|
2238
|
-
});
|
|
2239
|
-
}
|
|
2240
|
-
}
|
|
2241
|
-
}
|
|
2242
|
-
return results;
|
|
2243
|
-
}
|
|
2244
|
-
|
|
2245
3083
|
// src/linter/index.ts
|
|
2246
|
-
var
|
|
3084
|
+
var RULES_WITHOUT_SCHEMA = [
|
|
2247
3085
|
checkBrokenWikilinks,
|
|
2248
3086
|
checkOrphanedPages,
|
|
2249
3087
|
checkMissingSummaries,
|
|
2250
3088
|
checkDuplicateConcepts,
|
|
2251
3089
|
checkEmptyPages,
|
|
2252
|
-
checkBrokenCitations
|
|
3090
|
+
checkBrokenCitations,
|
|
3091
|
+
checkMalformedClaimCitations,
|
|
3092
|
+
checkLowConfidencePages,
|
|
3093
|
+
checkContradictedPages,
|
|
3094
|
+
checkInferredWithoutCitations
|
|
2253
3095
|
];
|
|
3096
|
+
var RULES_WITH_SCHEMA = [checkSchemaCrossLinks];
|
|
2254
3097
|
function countBySeverity(results, severity) {
|
|
2255
3098
|
return results.filter((r) => r.severity === severity).length;
|
|
2256
3099
|
}
|
|
2257
3100
|
async function lint(root) {
|
|
2258
|
-
const
|
|
2259
|
-
|
|
2260
|
-
|
|
2261
|
-
|
|
3101
|
+
const schema = await loadSchema(root);
|
|
3102
|
+
const [plainResults, schemaResults] = await Promise.all([
|
|
3103
|
+
Promise.all(RULES_WITHOUT_SCHEMA.map((rule) => rule(root))),
|
|
3104
|
+
Promise.all(RULES_WITH_SCHEMA.map((rule) => rule(root, schema)))
|
|
3105
|
+
]);
|
|
3106
|
+
const results = [...plainResults.flat(), ...schemaResults.flat()];
|
|
2262
3107
|
return {
|
|
2263
3108
|
errors: countBySeverity(results, "error"),
|
|
2264
3109
|
warnings: countBySeverity(results, "warning"),
|
|
@@ -2286,6 +3131,9 @@ function printResult(result) {
|
|
|
2286
3131
|
}
|
|
2287
3132
|
async function lintCommand() {
|
|
2288
3133
|
header("Linting wiki");
|
|
3134
|
+
const schema = await loadSchema(process.cwd());
|
|
3135
|
+
const schemaSource = schema.loadedFrom ?? "defaults (no schema file)";
|
|
3136
|
+
status("i", dim(`Schema: ${schemaSource}`));
|
|
2289
3137
|
const summary = await lint(process.cwd());
|
|
2290
3138
|
for (const result of summary.results) {
|
|
2291
3139
|
printResult(result);
|
|
@@ -2302,12 +3150,170 @@ async function lintCommand() {
|
|
|
2302
3150
|
}
|
|
2303
3151
|
}
|
|
2304
3152
|
|
|
3153
|
+
// src/commands/schema.ts
|
|
3154
|
+
import { existsSync as existsSync10 } from "fs";
|
|
3155
|
+
import { mkdir as mkdir6, writeFile as writeFile5 } from "fs/promises";
|
|
3156
|
+
import path21 from "path";
|
|
3157
|
+
async function schemaInitCommand() {
|
|
3158
|
+
const root = process.cwd();
|
|
3159
|
+
const defaults = buildDefaultSchema();
|
|
3160
|
+
const targetPath = defaultSchemaInitPath(root);
|
|
3161
|
+
if (existsSync10(targetPath)) {
|
|
3162
|
+
status("!", warn(`Schema file already exists at ${targetPath}`));
|
|
3163
|
+
return;
|
|
3164
|
+
}
|
|
3165
|
+
await mkdir6(path21.dirname(targetPath), { recursive: true });
|
|
3166
|
+
const serializable = {
|
|
3167
|
+
version: defaults.version,
|
|
3168
|
+
defaultKind: defaults.defaultKind,
|
|
3169
|
+
kinds: defaults.kinds,
|
|
3170
|
+
seedPages: defaults.seedPages
|
|
3171
|
+
};
|
|
3172
|
+
await writeFile5(targetPath, `${JSON.stringify(serializable, null, 2)}
|
|
3173
|
+
`, "utf-8");
|
|
3174
|
+
status("+", success(`Wrote schema to ${targetPath}`));
|
|
3175
|
+
}
|
|
3176
|
+
async function schemaShowCommand() {
|
|
3177
|
+
const schema = await loadSchema(process.cwd());
|
|
3178
|
+
const loadedFrom = schema.loadedFrom ?? "(defaults \u2014 no schema file found)";
|
|
3179
|
+
header(`Schema (${loadedFrom})`);
|
|
3180
|
+
console.log(serializeSchemaToYaml(schema));
|
|
3181
|
+
}
|
|
3182
|
+
|
|
3183
|
+
// src/commands/review-list.ts
|
|
3184
|
+
async function reviewListCommand() {
|
|
3185
|
+
header("Pending review candidates");
|
|
3186
|
+
const candidates = await listCandidates(process.cwd());
|
|
3187
|
+
if (candidates.length === 0) {
|
|
3188
|
+
status("\u2713", success("No pending candidates."));
|
|
3189
|
+
return;
|
|
3190
|
+
}
|
|
3191
|
+
for (const candidate of candidates) {
|
|
3192
|
+
const sources = candidate.sources.join(", ");
|
|
3193
|
+
const meta = dim(`${candidate.generatedAt} | sources: ${sources}`);
|
|
3194
|
+
status("?", `${info(candidate.id)} \u2192 ${candidate.slug} ${meta}`);
|
|
3195
|
+
}
|
|
3196
|
+
status(
|
|
3197
|
+
"\u2192",
|
|
3198
|
+
dim(`Use \`llmwiki review show <id>\` to inspect a candidate.`)
|
|
3199
|
+
);
|
|
3200
|
+
}
|
|
3201
|
+
|
|
3202
|
+
// src/commands/review-show.ts
|
|
3203
|
+
async function reviewShowCommand(id) {
|
|
3204
|
+
const candidate = await loadCandidateOrFail(process.cwd(), id);
|
|
3205
|
+
if (!candidate) return;
|
|
3206
|
+
header(`Candidate ${candidate.id}`);
|
|
3207
|
+
status("i", dim(`title: ${candidate.title}`));
|
|
3208
|
+
status("i", dim(`slug: ${candidate.slug}`));
|
|
3209
|
+
status("i", dim(`summary: ${candidate.summary}`));
|
|
3210
|
+
status("i", dim(`sources: ${candidate.sources.join(", ")}`));
|
|
3211
|
+
status("i", dim(`generated: ${candidate.generatedAt}`));
|
|
3212
|
+
console.log();
|
|
3213
|
+
console.log(candidate.body);
|
|
3214
|
+
if (candidate.schemaViolations && candidate.schemaViolations.length > 0) {
|
|
3215
|
+
console.log();
|
|
3216
|
+
header("Schema violations");
|
|
3217
|
+
for (const v of candidate.schemaViolations) {
|
|
3218
|
+
status("!", warn(`[${v.severity}] ${v.message}`));
|
|
3219
|
+
}
|
|
3220
|
+
}
|
|
3221
|
+
}
|
|
3222
|
+
|
|
3223
|
+
// src/commands/review-approve.ts
|
|
3224
|
+
import path22 from "path";
|
|
3225
|
+
|
|
3226
|
+
// src/commands/review-helpers.ts
|
|
3227
|
+
async function runReviewUnderLock(id, underLock) {
|
|
3228
|
+
const root = process.cwd();
|
|
3229
|
+
const preCheck = await loadCandidateOrFail(root, id);
|
|
3230
|
+
if (!preCheck) return;
|
|
3231
|
+
const locked = await acquireLock(root);
|
|
3232
|
+
if (!locked) {
|
|
3233
|
+
status("!", error("Could not acquire lock. Try again later."));
|
|
3234
|
+
process.exitCode = 1;
|
|
3235
|
+
return;
|
|
3236
|
+
}
|
|
3237
|
+
try {
|
|
3238
|
+
await underLock(root, id);
|
|
3239
|
+
} finally {
|
|
3240
|
+
await releaseLock(root);
|
|
3241
|
+
}
|
|
3242
|
+
}
|
|
3243
|
+
|
|
3244
|
+
// src/commands/review-approve.ts
|
|
3245
|
+
async function reviewApproveCommand(id) {
|
|
3246
|
+
await runReviewUnderLock(id, approveUnderLock);
|
|
3247
|
+
}
|
|
3248
|
+
async function approveUnderLock(root, id) {
|
|
3249
|
+
const candidate = await loadCandidateUnderLockOrFail(root, id);
|
|
3250
|
+
if (!candidate) return;
|
|
3251
|
+
if (!validateWikiPage(candidate.body)) {
|
|
3252
|
+
status("!", error(`Candidate ${id} failed page validation; not approved.`));
|
|
3253
|
+
process.exitCode = 1;
|
|
3254
|
+
return;
|
|
3255
|
+
}
|
|
3256
|
+
const pagePath = path22.join(root, CONCEPTS_DIR, `${candidate.slug}.md`);
|
|
3257
|
+
await atomicWrite(pagePath, candidate.body);
|
|
3258
|
+
status("+", success(`Approved \u2192 ${source(pagePath)}`));
|
|
3259
|
+
await persistCandidateSourceStates(root, candidate);
|
|
3260
|
+
await refreshWikiAfterApproval(root, candidate.slug);
|
|
3261
|
+
await deleteCandidate(root, id);
|
|
3262
|
+
status("\u2713", dim(`Candidate ${id} cleared.`));
|
|
3263
|
+
}
|
|
3264
|
+
async function persistCandidateSourceStates(root, candidate) {
|
|
3265
|
+
const states = candidate.sourceStates;
|
|
3266
|
+
if (!states) return;
|
|
3267
|
+
const otherSources = await collectOtherCandidateSources(root, candidate.id);
|
|
3268
|
+
for (const [sourceFile, entry] of Object.entries(states)) {
|
|
3269
|
+
if (otherSources.has(sourceFile)) continue;
|
|
3270
|
+
await updateSourceState(root, sourceFile, entry);
|
|
3271
|
+
}
|
|
3272
|
+
}
|
|
3273
|
+
async function collectOtherCandidateSources(root, approvingId) {
|
|
3274
|
+
const pending = await listCandidates(root);
|
|
3275
|
+
const sources = /* @__PURE__ */ new Set();
|
|
3276
|
+
for (const candidate of pending) {
|
|
3277
|
+
if (candidate.id === approvingId) continue;
|
|
3278
|
+
for (const source2 of candidate.sources) sources.add(source2);
|
|
3279
|
+
}
|
|
3280
|
+
return sources;
|
|
3281
|
+
}
|
|
3282
|
+
async function refreshWikiAfterApproval(root, slug) {
|
|
3283
|
+
await resolveLinks(root, [slug], [slug]);
|
|
3284
|
+
await generateIndex(root);
|
|
3285
|
+
await generateMOC(root);
|
|
3286
|
+
await safelyUpdateEmbeddings2(root, [slug]);
|
|
3287
|
+
}
|
|
3288
|
+
async function safelyUpdateEmbeddings2(root, slugs) {
|
|
3289
|
+
try {
|
|
3290
|
+
await updateEmbeddings(root, slugs);
|
|
3291
|
+
} catch (err) {
|
|
3292
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
3293
|
+
status("!", warn(`Skipped embeddings update: ${message}`));
|
|
3294
|
+
}
|
|
3295
|
+
}
|
|
3296
|
+
|
|
3297
|
+
// src/commands/review-reject.ts
|
|
3298
|
+
async function reviewRejectCommand(id) {
|
|
3299
|
+
await runReviewUnderLock(id, rejectUnderLock);
|
|
3300
|
+
}
|
|
3301
|
+
async function rejectUnderLock(root, id) {
|
|
3302
|
+
const candidate = await loadCandidateUnderLockOrFail(root, id);
|
|
3303
|
+
if (!candidate) return;
|
|
3304
|
+
await archiveCandidate(root, id);
|
|
3305
|
+
status(
|
|
3306
|
+
"-",
|
|
3307
|
+
warn(`Rejected candidate ${id} (${candidate.slug}) \u2014 archived, wiki unchanged.`)
|
|
3308
|
+
);
|
|
3309
|
+
}
|
|
3310
|
+
|
|
2305
3311
|
// src/mcp/server.ts
|
|
2306
3312
|
import { McpServer as McpServer2 } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2307
3313
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
2308
3314
|
|
|
2309
3315
|
// src/mcp/tools.ts
|
|
2310
|
-
import
|
|
3316
|
+
import path23 from "path";
|
|
2311
3317
|
import { z } from "zod";
|
|
2312
3318
|
|
|
2313
3319
|
// src/mcp/provider-check.ts
|
|
@@ -2437,7 +3443,7 @@ async function pickSearchSlugs(root, question) {
|
|
|
2437
3443
|
if (candidates.length > 0) return candidates.map((c) => c.slug);
|
|
2438
3444
|
} catch {
|
|
2439
3445
|
}
|
|
2440
|
-
const indexContent = await safeReadFile(
|
|
3446
|
+
const indexContent = await safeReadFile(path23.join(root, INDEX_FILE));
|
|
2441
3447
|
const { pages } = await selectPages(question, indexContent);
|
|
2442
3448
|
return pages;
|
|
2443
3449
|
}
|
|
@@ -2486,11 +3492,12 @@ function registerStatusTool(server, root) {
|
|
|
2486
3492
|
);
|
|
2487
3493
|
}
|
|
2488
3494
|
async function collectStatus(root) {
|
|
2489
|
-
const concepts = await collectPageSummaries(
|
|
2490
|
-
const queries = await collectPageSummaries(
|
|
3495
|
+
const concepts = await collectPageSummaries(path23.join(root, CONCEPTS_DIR));
|
|
3496
|
+
const queries = await collectPageSummaries(path23.join(root, QUERIES_DIR));
|
|
2491
3497
|
const state = await readState(root);
|
|
2492
3498
|
const changes = await detectChanges(root, state);
|
|
2493
3499
|
const orphans = await findOrphanedSlugs(root);
|
|
3500
|
+
const pendingCandidates = await countCandidates(root);
|
|
2494
3501
|
const compileTimes = Object.values(state.sources).map((s) => s.compiledAt);
|
|
2495
3502
|
const lastCompile = compileTimes.length > 0 ? compileTimes.sort().slice(-1)[0] : null;
|
|
2496
3503
|
return {
|
|
@@ -2498,11 +3505,12 @@ async function collectStatus(root) {
|
|
|
2498
3505
|
sources: Object.keys(state.sources).length,
|
|
2499
3506
|
lastCompiledAt: lastCompile,
|
|
2500
3507
|
orphanedPages: orphans,
|
|
3508
|
+
pendingCandidates,
|
|
2501
3509
|
pendingChanges: changes.filter((c) => c.status !== "unchanged").map((c) => ({ file: c.file, status: c.status }))
|
|
2502
3510
|
};
|
|
2503
3511
|
}
|
|
2504
3512
|
async function findOrphanedSlugs(root) {
|
|
2505
|
-
const scanned = await scanWikiPages(
|
|
3513
|
+
const scanned = await scanWikiPages(path23.join(root, CONCEPTS_DIR));
|
|
2506
3514
|
return scanned.filter(({ meta }) => meta.orphaned).map(({ slug }) => slug);
|
|
2507
3515
|
}
|
|
2508
3516
|
async function loadPageRecords(root, slugs) {
|
|
@@ -2515,7 +3523,7 @@ async function loadPageRecords(root, slugs) {
|
|
|
2515
3523
|
}
|
|
2516
3524
|
async function readPage(root, slug) {
|
|
2517
3525
|
for (const dir of PAGE_DIRS2) {
|
|
2518
|
-
const content = await safeReadFile(
|
|
3526
|
+
const content = await safeReadFile(path23.join(root, dir, `${slug}.md`));
|
|
2519
3527
|
if (!content) continue;
|
|
2520
3528
|
const { meta, body } = parseFrontmatter(content);
|
|
2521
3529
|
if (meta.orphaned) continue;
|
|
@@ -2530,8 +3538,8 @@ async function readPage(root, slug) {
|
|
|
2530
3538
|
}
|
|
2531
3539
|
|
|
2532
3540
|
// src/mcp/resources.ts
|
|
2533
|
-
import
|
|
2534
|
-
import { readdir as
|
|
3541
|
+
import path24 from "path";
|
|
3542
|
+
import { readdir as readdir9 } from "fs/promises";
|
|
2535
3543
|
import { ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2536
3544
|
function jsonContent(uri, payload) {
|
|
2537
3545
|
return {
|
|
@@ -2564,7 +3572,7 @@ function registerIndexResource(server, root) {
|
|
|
2564
3572
|
mimeType: "text/markdown"
|
|
2565
3573
|
},
|
|
2566
3574
|
async (uri) => {
|
|
2567
|
-
const content = await safeReadFile(
|
|
3575
|
+
const content = await safeReadFile(path24.join(root, INDEX_FILE));
|
|
2568
3576
|
return { contents: [markdownContent(uri, content)] };
|
|
2569
3577
|
}
|
|
2570
3578
|
);
|
|
@@ -2631,23 +3639,23 @@ function registerQueryResource(server, root) {
|
|
|
2631
3639
|
);
|
|
2632
3640
|
}
|
|
2633
3641
|
async function listSources(root) {
|
|
2634
|
-
const sourcesPath =
|
|
3642
|
+
const sourcesPath = path24.join(root, SOURCES_DIR);
|
|
2635
3643
|
let files;
|
|
2636
3644
|
try {
|
|
2637
|
-
files = await
|
|
3645
|
+
files = await readdir9(sourcesPath);
|
|
2638
3646
|
} catch {
|
|
2639
3647
|
return [];
|
|
2640
3648
|
}
|
|
2641
3649
|
const records = [];
|
|
2642
3650
|
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
2643
|
-
const content = await safeReadFile(
|
|
3651
|
+
const content = await safeReadFile(path24.join(sourcesPath, file));
|
|
2644
3652
|
const { meta } = parseFrontmatter(content);
|
|
2645
3653
|
records.push({ filename: file, ...meta });
|
|
2646
3654
|
}
|
|
2647
3655
|
return records;
|
|
2648
3656
|
}
|
|
2649
3657
|
async function loadPageWithMeta(root, dir, slug) {
|
|
2650
|
-
const filePath =
|
|
3658
|
+
const filePath = path24.join(root, dir, `${slug}.md`);
|
|
2651
3659
|
const content = await safeReadFile(filePath);
|
|
2652
3660
|
if (!content) {
|
|
2653
3661
|
throw new Error(`Page not found: ${dir}/${slug}.md`);
|
|
@@ -2656,10 +3664,10 @@ async function loadPageWithMeta(root, dir, slug) {
|
|
|
2656
3664
|
return { slug, meta, body: body.trim() };
|
|
2657
3665
|
}
|
|
2658
3666
|
async function listPagesUnder(root, dir, scheme) {
|
|
2659
|
-
const pagesPath =
|
|
3667
|
+
const pagesPath = path24.join(root, dir);
|
|
2660
3668
|
let files;
|
|
2661
3669
|
try {
|
|
2662
|
-
files = await
|
|
3670
|
+
files = await readdir9(pagesPath);
|
|
2663
3671
|
} catch {
|
|
2664
3672
|
return { resources: [] };
|
|
2665
3673
|
}
|
|
@@ -2695,10 +3703,46 @@ program.command("ingest <source>").description("Ingest a URL or local file into
|
|
|
2695
3703
|
process.exit(1);
|
|
2696
3704
|
}
|
|
2697
3705
|
});
|
|
2698
|
-
program.command("compile").description("Compile sources/ into an interlinked wiki").
|
|
3706
|
+
program.command("compile").description("Compile sources/ into an interlinked wiki").option(
|
|
3707
|
+
"--review",
|
|
3708
|
+
"Write generated pages as review candidates under .llmwiki/candidates/ instead of mutating wiki/. Orphan-marking for deleted sources is deferred until the next non-review compile."
|
|
3709
|
+
).action(async (options) => {
|
|
2699
3710
|
try {
|
|
2700
3711
|
requireProvider();
|
|
2701
|
-
await compileCommand();
|
|
3712
|
+
await compileCommand({ review: options.review });
|
|
3713
|
+
} catch (err) {
|
|
3714
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
3715
|
+
process.exit(1);
|
|
3716
|
+
}
|
|
3717
|
+
});
|
|
3718
|
+
var reviewCommand = program.command("review").description("Inspect and act on pending compile review candidates");
|
|
3719
|
+
reviewCommand.command("list").description("List pending review candidates").action(async () => {
|
|
3720
|
+
try {
|
|
3721
|
+
await reviewListCommand();
|
|
3722
|
+
} catch (err) {
|
|
3723
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
3724
|
+
process.exit(1);
|
|
3725
|
+
}
|
|
3726
|
+
});
|
|
3727
|
+
reviewCommand.command("show <id>").description("Print a single candidate's metadata and body").action(async (id) => {
|
|
3728
|
+
try {
|
|
3729
|
+
await reviewShowCommand(id);
|
|
3730
|
+
} catch (err) {
|
|
3731
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
3732
|
+
process.exit(1);
|
|
3733
|
+
}
|
|
3734
|
+
});
|
|
3735
|
+
reviewCommand.command("approve <id>").description("Approve a candidate and promote it into wiki/concepts/").action(async (id) => {
|
|
3736
|
+
try {
|
|
3737
|
+
await reviewApproveCommand(id);
|
|
3738
|
+
} catch (err) {
|
|
3739
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
3740
|
+
process.exit(1);
|
|
3741
|
+
}
|
|
3742
|
+
});
|
|
3743
|
+
reviewCommand.command("reject <id>").description("Reject a candidate and archive it without touching wiki/").action(async (id) => {
|
|
3744
|
+
try {
|
|
3745
|
+
await reviewRejectCommand(id);
|
|
2702
3746
|
} catch (err) {
|
|
2703
3747
|
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
2704
3748
|
process.exit(1);
|
|
@@ -2730,6 +3774,23 @@ program.command("lint").description("Run rule-based quality checks against the w
|
|
|
2730
3774
|
process.exit(1);
|
|
2731
3775
|
}
|
|
2732
3776
|
});
|
|
3777
|
+
var schemaCmd = program.command("schema").description("Inspect or initialize the project's wiki schema config");
|
|
3778
|
+
schemaCmd.command("init").description("Write a starter schema file to .llmwiki/schema.json").action(async () => {
|
|
3779
|
+
try {
|
|
3780
|
+
await schemaInitCommand();
|
|
3781
|
+
} catch (err) {
|
|
3782
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
3783
|
+
process.exit(1);
|
|
3784
|
+
}
|
|
3785
|
+
});
|
|
3786
|
+
schemaCmd.command("show").description("Print the resolved schema for this project").action(async () => {
|
|
3787
|
+
try {
|
|
3788
|
+
await schemaShowCommand();
|
|
3789
|
+
} catch (err) {
|
|
3790
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
3791
|
+
process.exit(1);
|
|
3792
|
+
}
|
|
3793
|
+
});
|
|
2733
3794
|
program.command("serve").description("Start an MCP server exposing wiki tools and resources over stdio").option("--root <dir>", "Project root directory", process.cwd()).action(async (options) => {
|
|
2734
3795
|
try {
|
|
2735
3796
|
await startMCPServer({ root: options.root, version });
|