llm-wiki-compiler 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +222 -13
- package/dist/cli.js +2309 -311
- package/dist/cli.js.map +1 -1
- package/package.json +9 -3
package/dist/cli.js
CHANGED
|
@@ -13,6 +13,12 @@ import { mkdir as mkdir2, writeFile as writeFile2 } from "fs/promises";
|
|
|
13
13
|
import { writeFile, rename, readFile, mkdir } from "fs/promises";
|
|
14
14
|
import path from "path";
|
|
15
15
|
import yaml from "js-yaml";
|
|
16
|
+
var VALID_PROVENANCE_STATES = /* @__PURE__ */ new Set([
|
|
17
|
+
"extracted",
|
|
18
|
+
"merged",
|
|
19
|
+
"inferred",
|
|
20
|
+
"ambiguous"
|
|
21
|
+
]);
|
|
16
22
|
function slugify(title) {
|
|
17
23
|
return title.toLowerCase().replace(/['']/g, "").replace(/[^\w\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
|
|
18
24
|
}
|
|
@@ -50,6 +56,46 @@ async function safeReadFile(filePath) {
|
|
|
50
56
|
return "";
|
|
51
57
|
}
|
|
52
58
|
}
|
|
59
|
+
function parseConfidence(raw) {
|
|
60
|
+
if (typeof raw !== "number" || !Number.isFinite(raw)) return void 0;
|
|
61
|
+
if (raw < 0) return 0;
|
|
62
|
+
if (raw > 1) return 1;
|
|
63
|
+
return raw;
|
|
64
|
+
}
|
|
65
|
+
function parseProvenanceState(raw) {
|
|
66
|
+
if (typeof raw !== "string") return void 0;
|
|
67
|
+
return VALID_PROVENANCE_STATES.has(raw) ? raw : void 0;
|
|
68
|
+
}
|
|
69
|
+
function coerceContradictionEntry(entry) {
|
|
70
|
+
if (typeof entry === "string" && entry.trim().length > 0) {
|
|
71
|
+
return { slug: entry.trim() };
|
|
72
|
+
}
|
|
73
|
+
if (entry && typeof entry === "object" && "slug" in entry) {
|
|
74
|
+
const obj = entry;
|
|
75
|
+
if (typeof obj.slug !== "string" || obj.slug.trim().length === 0) return null;
|
|
76
|
+
const ref = { slug: obj.slug.trim() };
|
|
77
|
+
if (typeof obj.reason === "string") ref.reason = obj.reason;
|
|
78
|
+
return ref;
|
|
79
|
+
}
|
|
80
|
+
return null;
|
|
81
|
+
}
|
|
82
|
+
function parseContradictedBy(raw) {
|
|
83
|
+
if (!Array.isArray(raw)) return void 0;
|
|
84
|
+
const refs = raw.map(coerceContradictionEntry).filter((ref) => ref !== null);
|
|
85
|
+
return refs.length > 0 ? refs : void 0;
|
|
86
|
+
}
|
|
87
|
+
function parseInferredParagraphs(raw) {
|
|
88
|
+
if (typeof raw !== "number" || !Number.isInteger(raw) || raw < 0) return void 0;
|
|
89
|
+
return raw;
|
|
90
|
+
}
|
|
91
|
+
function parseProvenanceMetadata(meta) {
|
|
92
|
+
return {
|
|
93
|
+
confidence: parseConfidence(meta.confidence),
|
|
94
|
+
provenanceState: parseProvenanceState(meta.provenanceState),
|
|
95
|
+
contradictedBy: parseContradictedBy(meta.contradictedBy),
|
|
96
|
+
inferredParagraphs: parseInferredParagraphs(meta.inferredParagraphs)
|
|
97
|
+
};
|
|
98
|
+
}
|
|
53
99
|
function validateWikiPage(content) {
|
|
54
100
|
if (!content || content.trim().length === 0) return false;
|
|
55
101
|
const { meta, body } = parseFrontmatter(content);
|
|
@@ -66,7 +112,14 @@ var COMPILE_CONCURRENCY = 5;
|
|
|
66
112
|
var RETRY_COUNT = 3;
|
|
67
113
|
var RETRY_BASE_MS = 1e3;
|
|
68
114
|
var RETRY_MULTIPLIER = 4;
|
|
69
|
-
var
|
|
115
|
+
var DEFAULT_PROVIDER = "anthropic";
|
|
116
|
+
var PROVIDER_MODELS = {
|
|
117
|
+
anthropic: "claude-sonnet-4-20250514",
|
|
118
|
+
openai: "gpt-4o",
|
|
119
|
+
ollama: "llama3.1",
|
|
120
|
+
minimax: "MiniMax-M2.7"
|
|
121
|
+
};
|
|
122
|
+
var OLLAMA_DEFAULT_HOST = "http://localhost:11434/v1";
|
|
70
123
|
var SOURCES_DIR = "sources";
|
|
71
124
|
var CONCEPTS_DIR = "wiki/concepts";
|
|
72
125
|
var QUERIES_DIR = "wiki/queries";
|
|
@@ -74,6 +127,18 @@ var LLMWIKI_DIR = ".llmwiki";
|
|
|
74
127
|
var STATE_FILE = ".llmwiki/state.json";
|
|
75
128
|
var LOCK_FILE = ".llmwiki/lock";
|
|
76
129
|
var INDEX_FILE = "wiki/index.md";
|
|
130
|
+
var MOC_FILE = "wiki/MOC.md";
|
|
131
|
+
var EMBEDDINGS_FILE = ".llmwiki/embeddings.json";
|
|
132
|
+
var CANDIDATES_DIR = ".llmwiki/candidates";
|
|
133
|
+
var CANDIDATES_ARCHIVE_DIR = ".llmwiki/candidates/archive";
|
|
134
|
+
var EMBEDDING_TOP_K = 15;
|
|
135
|
+
var LOW_CONFIDENCE_THRESHOLD = 0.5;
|
|
136
|
+
var MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS = 2;
|
|
137
|
+
var EMBEDDING_MODELS = {
|
|
138
|
+
anthropic: "voyage-3-lite",
|
|
139
|
+
openai: "text-embedding-3-small",
|
|
140
|
+
ollama: "nomic-embed-text"
|
|
141
|
+
};
|
|
77
142
|
|
|
78
143
|
// src/utils/output.ts
|
|
79
144
|
var RESET = "\x1B[0m";
|
|
@@ -234,26 +299,36 @@ async function saveSource(title, document) {
|
|
|
234
299
|
await writeFile2(destPath, document, "utf-8");
|
|
235
300
|
return destPath;
|
|
236
301
|
}
|
|
237
|
-
async function
|
|
302
|
+
async function ingestSource(source2) {
|
|
238
303
|
status("*", info(`Ingesting: ${source2}`));
|
|
239
304
|
const { title, content } = isUrl(source2) ? await ingestWeb(source2) : await ingestFile(source2);
|
|
240
305
|
const result = enforceCharLimit(content);
|
|
241
306
|
enforceMinContent(result.content);
|
|
242
307
|
const document = buildDocument(title, source2, result);
|
|
243
308
|
const savedPath = await saveSource(title, document);
|
|
309
|
+
return {
|
|
310
|
+
filename: path3.basename(savedPath),
|
|
311
|
+
charCount: result.content.length,
|
|
312
|
+
truncated: result.truncated,
|
|
313
|
+
source: source2
|
|
314
|
+
};
|
|
315
|
+
}
|
|
316
|
+
async function ingest(source2) {
|
|
317
|
+
const result = await ingestSource(source2);
|
|
318
|
+
const savedPath = path3.join(SOURCES_DIR, result.filename);
|
|
244
319
|
status(
|
|
245
320
|
"+",
|
|
246
|
-
success(`Saved ${bold(
|
|
321
|
+
success(`Saved ${bold(result.filename)} \u2192 ${source(savedPath)}`)
|
|
247
322
|
);
|
|
248
323
|
status("\u2192", dim("Next: llmwiki compile"));
|
|
249
324
|
}
|
|
250
325
|
|
|
251
326
|
// src/commands/compile.ts
|
|
252
|
-
import { existsSync as
|
|
327
|
+
import { existsSync as existsSync5 } from "fs";
|
|
253
328
|
|
|
254
329
|
// src/compiler/index.ts
|
|
255
|
-
import { readFile as
|
|
256
|
-
import
|
|
330
|
+
import { readFile as readFile8 } from "fs/promises";
|
|
331
|
+
import path16 from "path";
|
|
257
332
|
|
|
258
333
|
// src/utils/state.ts
|
|
259
334
|
import { readFile as readFile3, writeFile as writeFile3, rename as rename2, mkdir as mkdir3, copyFile } from "fs/promises";
|
|
@@ -296,30 +371,477 @@ async function removeSourceState(root, sourceFile) {
|
|
|
296
371
|
await writeState(root, state);
|
|
297
372
|
}
|
|
298
373
|
|
|
299
|
-
// src/
|
|
374
|
+
// src/compiler/source-state.ts
|
|
375
|
+
import path6 from "path";
|
|
376
|
+
|
|
377
|
+
// src/compiler/hasher.ts
|
|
378
|
+
import { createHash } from "crypto";
|
|
379
|
+
import { readFile as readFile4, readdir } from "fs/promises";
|
|
380
|
+
import path5 from "path";
|
|
381
|
+
async function hashFile(filePath) {
|
|
382
|
+
const content = await readFile4(filePath, "utf-8");
|
|
383
|
+
return createHash("sha256").update(content).digest("hex");
|
|
384
|
+
}
|
|
385
|
+
async function detectChanges(root, prevState) {
|
|
386
|
+
const sourcesPath = path5.join(root, SOURCES_DIR);
|
|
387
|
+
const currentFiles = await listSourceFiles(sourcesPath);
|
|
388
|
+
const changes = [];
|
|
389
|
+
for (const file of currentFiles) {
|
|
390
|
+
const status2 = await classifyFile(root, file, prevState);
|
|
391
|
+
changes.push({ file, status: status2 });
|
|
392
|
+
}
|
|
393
|
+
const deletedChanges = findDeletedFiles(currentFiles, prevState);
|
|
394
|
+
changes.push(...deletedChanges);
|
|
395
|
+
return changes;
|
|
396
|
+
}
|
|
397
|
+
async function listSourceFiles(sourcesPath) {
|
|
398
|
+
try {
|
|
399
|
+
const entries = await readdir(sourcesPath);
|
|
400
|
+
return entries.filter((f) => f.endsWith(".md"));
|
|
401
|
+
} catch {
|
|
402
|
+
return [];
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
async function classifyFile(root, file, prevState) {
|
|
406
|
+
const filePath = path5.join(root, SOURCES_DIR, file);
|
|
407
|
+
const hash = await hashFile(filePath);
|
|
408
|
+
const prev = prevState.sources[file];
|
|
409
|
+
if (!prev) return "new";
|
|
410
|
+
if (prev.hash !== hash) return "changed";
|
|
411
|
+
return "unchanged";
|
|
412
|
+
}
|
|
413
|
+
function findDeletedFiles(currentFiles, prevState) {
|
|
414
|
+
const currentSet = new Set(currentFiles);
|
|
415
|
+
return Object.keys(prevState.sources).filter((file) => !currentSet.has(file)).map((file) => ({ file, status: "deleted" }));
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
// src/compiler/source-state.ts
|
|
419
|
+
async function buildExtractionSourceStates(root, extractions) {
|
|
420
|
+
const snapshot = {};
|
|
421
|
+
const compiledAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
422
|
+
for (const result of extractions) {
|
|
423
|
+
if (result.concepts.length === 0) continue;
|
|
424
|
+
snapshot[result.sourceFile] = await buildEntry(root, result, compiledAt);
|
|
425
|
+
}
|
|
426
|
+
return snapshot;
|
|
427
|
+
}
|
|
428
|
+
async function buildEntry(root, result, compiledAt) {
|
|
429
|
+
const filePath = path6.join(root, SOURCES_DIR, result.sourceFile);
|
|
430
|
+
const hash = await hashFile(filePath);
|
|
431
|
+
return {
|
|
432
|
+
hash,
|
|
433
|
+
concepts: result.concepts.map((concept) => slugify(concept.concept)),
|
|
434
|
+
compiledAt
|
|
435
|
+
};
|
|
436
|
+
}
|
|
437
|
+
function pickStatesForSources(allStates, sourceFiles) {
|
|
438
|
+
const picked = {};
|
|
439
|
+
for (const file of sourceFiles) {
|
|
440
|
+
const entry = allStates[file];
|
|
441
|
+
if (entry) picked[file] = entry;
|
|
442
|
+
}
|
|
443
|
+
return picked;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
// src/providers/anthropic.ts
|
|
300
447
|
import Anthropic from "@anthropic-ai/sdk";
|
|
301
|
-
var
|
|
302
|
-
function
|
|
303
|
-
|
|
304
|
-
|
|
448
|
+
var VOYAGE_EMBEDDINGS_URL = "https://api.voyageai.com/v1/embeddings";
|
|
449
|
+
function buildAnthropicClientOptions(options = {}) {
|
|
450
|
+
const trimmedBaseURL = options.baseURL?.trim();
|
|
451
|
+
const trimmedApiKey = options.apiKey?.trim();
|
|
452
|
+
const trimmedAuthToken = options.authToken?.trim();
|
|
453
|
+
const result = {};
|
|
454
|
+
if (trimmedApiKey) {
|
|
455
|
+
result.apiKey = trimmedApiKey;
|
|
456
|
+
}
|
|
457
|
+
if (trimmedAuthToken) {
|
|
458
|
+
result.authToken = trimmedAuthToken;
|
|
459
|
+
}
|
|
460
|
+
if (!trimmedBaseURL) {
|
|
461
|
+
return result;
|
|
462
|
+
}
|
|
463
|
+
const normalizedBaseURL = trimmedBaseURL.endsWith("/") && trimmedBaseURL.length > 1 ? trimmedBaseURL.slice(0, -1) : trimmedBaseURL;
|
|
464
|
+
result.baseURL = normalizedBaseURL;
|
|
465
|
+
return result;
|
|
466
|
+
}
|
|
467
|
+
var AnthropicProvider = class {
|
|
468
|
+
client;
|
|
469
|
+
model;
|
|
470
|
+
constructor(model, options = {}) {
|
|
471
|
+
this.model = model;
|
|
472
|
+
this.client = new Anthropic(buildAnthropicClientOptions(options));
|
|
473
|
+
}
|
|
474
|
+
/** Send a single non-streaming completion request. */
|
|
475
|
+
async complete(system, messages, maxTokens) {
|
|
476
|
+
const response = await this.client.messages.create({
|
|
477
|
+
model: this.model,
|
|
478
|
+
max_tokens: maxTokens,
|
|
479
|
+
system,
|
|
480
|
+
messages
|
|
481
|
+
});
|
|
482
|
+
const textBlock = response.content.find((block) => block.type === "text");
|
|
483
|
+
return textBlock?.type === "text" ? textBlock.text : "";
|
|
484
|
+
}
|
|
485
|
+
/** Stream a completion, invoking onToken for each text chunk. */
|
|
486
|
+
async stream(system, messages, maxTokens, onToken) {
|
|
487
|
+
const stream = this.client.messages.stream({
|
|
488
|
+
model: this.model,
|
|
489
|
+
max_tokens: maxTokens,
|
|
490
|
+
system,
|
|
491
|
+
messages
|
|
492
|
+
});
|
|
493
|
+
let fullText = "";
|
|
494
|
+
for await (const event of stream) {
|
|
495
|
+
if (event.type === "content_block_delta" && event.delta.type === "text_delta") {
|
|
496
|
+
fullText += event.delta.text;
|
|
497
|
+
onToken?.(event.delta.text);
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
return fullText;
|
|
501
|
+
}
|
|
502
|
+
/** Call Claude with tool definitions and return the parsed tool input as JSON. */
|
|
503
|
+
async toolCall(system, messages, tools, maxTokens) {
|
|
504
|
+
const anthropicTools = tools.map((t) => ({
|
|
505
|
+
name: t.name,
|
|
506
|
+
description: t.description,
|
|
507
|
+
input_schema: t.input_schema
|
|
508
|
+
}));
|
|
509
|
+
const response = await this.client.messages.create({
|
|
510
|
+
model: this.model,
|
|
511
|
+
max_tokens: maxTokens,
|
|
512
|
+
system,
|
|
513
|
+
messages,
|
|
514
|
+
tools: anthropicTools
|
|
515
|
+
});
|
|
516
|
+
const toolBlock = response.content.find((block) => block.type === "tool_use");
|
|
517
|
+
if (toolBlock?.type === "tool_use") {
|
|
518
|
+
return JSON.stringify(toolBlock.input);
|
|
519
|
+
}
|
|
520
|
+
const textBlock = response.content.find((block) => block.type === "text");
|
|
521
|
+
return textBlock?.type === "text" ? textBlock.text : "";
|
|
522
|
+
}
|
|
523
|
+
/**
|
|
524
|
+
* Produce a single embedding vector via the Voyage API.
|
|
525
|
+
*
|
|
526
|
+
* Anthropic does not ship a first-party embeddings endpoint, so we delegate
|
|
527
|
+
* to Voyage (their recommended partner). Requires VOYAGE_API_KEY.
|
|
528
|
+
*/
|
|
529
|
+
async embed(text) {
|
|
530
|
+
const apiKey = process.env.VOYAGE_API_KEY?.trim();
|
|
531
|
+
if (!apiKey) {
|
|
532
|
+
throw new Error(
|
|
533
|
+
"VOYAGE_API_KEY is not set. Anthropic embeddings use Voyage \u2014 set VOYAGE_API_KEY to enable semantic search."
|
|
534
|
+
);
|
|
535
|
+
}
|
|
536
|
+
const response = await fetch(VOYAGE_EMBEDDINGS_URL, {
|
|
537
|
+
method: "POST",
|
|
538
|
+
headers: {
|
|
539
|
+
"Content-Type": "application/json",
|
|
540
|
+
Authorization: `Bearer ${apiKey}`
|
|
541
|
+
},
|
|
542
|
+
body: JSON.stringify({ input: text, model: EMBEDDING_MODELS.anthropic })
|
|
543
|
+
});
|
|
544
|
+
if (!response.ok) {
|
|
545
|
+
const detail = await response.text();
|
|
546
|
+
throw new Error(`Voyage embeddings request failed (${response.status}): ${detail}`);
|
|
547
|
+
}
|
|
548
|
+
const json = await response.json();
|
|
549
|
+
const vector = json.data?.[0]?.embedding;
|
|
550
|
+
if (!Array.isArray(vector)) {
|
|
551
|
+
throw new Error("Voyage embeddings response did not include a vector.");
|
|
552
|
+
}
|
|
553
|
+
return vector;
|
|
554
|
+
}
|
|
555
|
+
};
|
|
556
|
+
|
|
557
|
+
// src/providers/openai.ts
|
|
558
|
+
import OpenAI from "openai";
|
|
559
|
+
function translateToolToOpenAI(tool) {
|
|
560
|
+
return {
|
|
561
|
+
type: "function",
|
|
562
|
+
function: {
|
|
563
|
+
name: tool.name,
|
|
564
|
+
description: tool.description,
|
|
565
|
+
parameters: tool.input_schema
|
|
566
|
+
}
|
|
567
|
+
};
|
|
568
|
+
}
|
|
569
|
+
var OpenAIProvider = class {
|
|
570
|
+
client;
|
|
571
|
+
embeddingsClient;
|
|
572
|
+
model;
|
|
573
|
+
configuredEmbeddingModel;
|
|
574
|
+
constructor(model, options = {}) {
|
|
575
|
+
this.model = model;
|
|
576
|
+
this.configuredEmbeddingModel = options.embeddingModel;
|
|
577
|
+
const resolvedKey = options.apiKey ?? process.env.OPENAI_API_KEY ?? "";
|
|
578
|
+
this.client = new OpenAI({
|
|
579
|
+
apiKey: resolvedKey,
|
|
580
|
+
baseURL: options.baseURL ?? null
|
|
581
|
+
});
|
|
582
|
+
this.embeddingsClient = options.embeddingsBaseURL ? new OpenAI({ apiKey: resolvedKey, baseURL: options.embeddingsBaseURL }) : this.client;
|
|
583
|
+
}
|
|
584
|
+
/** Send a single non-streaming completion request. */
|
|
585
|
+
async complete(system, messages, maxTokens) {
|
|
586
|
+
const response = await this.client.chat.completions.create({
|
|
587
|
+
model: this.model,
|
|
588
|
+
max_tokens: maxTokens,
|
|
589
|
+
messages: [{ role: "system", content: system }, ...messages]
|
|
590
|
+
});
|
|
591
|
+
return response.choices[0]?.message?.content ?? "";
|
|
592
|
+
}
|
|
593
|
+
/** Stream a completion, invoking onToken for each text chunk. */
|
|
594
|
+
async stream(system, messages, maxTokens, onToken) {
|
|
595
|
+
const stream = await this.client.chat.completions.create({
|
|
596
|
+
model: this.model,
|
|
597
|
+
max_tokens: maxTokens,
|
|
598
|
+
messages: [{ role: "system", content: system }, ...messages],
|
|
599
|
+
stream: true
|
|
600
|
+
});
|
|
601
|
+
let fullText = "";
|
|
602
|
+
for await (const chunk of stream) {
|
|
603
|
+
const delta = chunk.choices[0]?.delta?.content;
|
|
604
|
+
if (delta) {
|
|
605
|
+
fullText += delta;
|
|
606
|
+
onToken?.(delta);
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
return fullText;
|
|
610
|
+
}
|
|
611
|
+
/** Call the model with tool definitions and return the parsed tool input as JSON. */
|
|
612
|
+
async toolCall(system, messages, tools, maxTokens) {
|
|
613
|
+
const openaiTools = tools.map(translateToolToOpenAI);
|
|
614
|
+
const response = await this.client.chat.completions.create({
|
|
615
|
+
model: this.model,
|
|
616
|
+
max_tokens: maxTokens,
|
|
617
|
+
messages: [{ role: "system", content: system }, ...messages],
|
|
618
|
+
tools: openaiTools
|
|
619
|
+
});
|
|
620
|
+
const toolCalls = response.choices[0]?.message?.tool_calls;
|
|
621
|
+
if (toolCalls && toolCalls.length > 0) {
|
|
622
|
+
return toolCalls[0].function.arguments;
|
|
623
|
+
}
|
|
624
|
+
return response.choices[0]?.message?.content ?? "";
|
|
625
|
+
}
|
|
626
|
+
/**
|
|
627
|
+
* Produce a single embedding vector via the OpenAI embeddings API.
|
|
628
|
+
* Subclasses (e.g. Ollama) override embeddingModel() to pick a different model.
|
|
629
|
+
*/
|
|
630
|
+
async embed(text) {
|
|
631
|
+
const response = await this.embeddingsClient.embeddings.create({
|
|
632
|
+
model: this.embeddingModel(),
|
|
633
|
+
input: text
|
|
634
|
+
});
|
|
635
|
+
const vector = response.data[0]?.embedding;
|
|
636
|
+
if (!Array.isArray(vector)) {
|
|
637
|
+
throw new Error("OpenAI embeddings response did not include a vector.");
|
|
638
|
+
}
|
|
639
|
+
return vector;
|
|
640
|
+
}
|
|
641
|
+
/** Default embedding model for this provider. Subclasses may override. */
|
|
642
|
+
embeddingModel() {
|
|
643
|
+
return this.configuredEmbeddingModel ?? EMBEDDING_MODELS.openai;
|
|
644
|
+
}
|
|
645
|
+
};
|
|
646
|
+
|
|
647
|
+
// src/providers/ollama.ts
|
|
648
|
+
var OllamaProvider = class extends OpenAIProvider {
|
|
649
|
+
constructor(model, options) {
|
|
650
|
+
super(model, {
|
|
651
|
+
baseURL: options.baseURL,
|
|
652
|
+
apiKey: "ollama",
|
|
653
|
+
embeddingsBaseURL: options.embeddingsBaseURL,
|
|
654
|
+
embeddingModel: options.embeddingModel
|
|
655
|
+
});
|
|
656
|
+
}
|
|
657
|
+
/** Ollama ships a dedicated embedding model (nomic-embed-text). */
|
|
658
|
+
embeddingModel() {
|
|
659
|
+
return this.configuredEmbeddingModel ?? EMBEDDING_MODELS.ollama;
|
|
660
|
+
}
|
|
661
|
+
};
|
|
662
|
+
|
|
663
|
+
// src/providers/minimax.ts
|
|
664
|
+
var MINIMAX_BASE_URL = "https://api.minimax.io/v1";
|
|
665
|
+
var MiniMaxProvider = class extends OpenAIProvider {
|
|
666
|
+
constructor(model, apiKey) {
|
|
667
|
+
super(model, { baseURL: MINIMAX_BASE_URL, apiKey });
|
|
668
|
+
}
|
|
669
|
+
};
|
|
670
|
+
|
|
671
|
+
// src/utils/claude-settings.ts
|
|
672
|
+
import { readFileSync } from "fs";
|
|
673
|
+
import { homedir } from "os";
|
|
674
|
+
import path7 from "path";
|
|
675
|
+
var CLAUDE_SETTINGS_PATH_ENV = "LLMWIKI_CLAUDE_SETTINGS_PATH";
|
|
676
|
+
function isRecord(value) {
|
|
677
|
+
return typeof value === "object" && value !== null;
|
|
678
|
+
}
|
|
679
|
+
function normalize(value) {
|
|
680
|
+
if (typeof value !== "string") return void 0;
|
|
681
|
+
const trimmed = value.trim();
|
|
682
|
+
return trimmed.length > 0 ? trimmed : void 0;
|
|
683
|
+
}
|
|
684
|
+
function resolveClaudeSettingsPath(env) {
|
|
685
|
+
return env[CLAUDE_SETTINGS_PATH_ENV] ?? path7.join(homedir(), ".claude", "settings.json");
|
|
686
|
+
}
|
|
687
|
+
function readClaudeSettingsFile(settingsPath) {
|
|
688
|
+
try {
|
|
689
|
+
return readFileSync(settingsPath, "utf8");
|
|
690
|
+
} catch (err) {
|
|
691
|
+
if (isRecord(err) && err.code === "ENOENT") {
|
|
692
|
+
return void 0;
|
|
693
|
+
}
|
|
694
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
695
|
+
throw new Error(`Failed to read Claude settings at "${settingsPath}": ${message}`);
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
function readClaudeSettingsEnv(env = process.env) {
|
|
699
|
+
const settingsPath = resolveClaudeSettingsPath(env);
|
|
700
|
+
const raw = readClaudeSettingsFile(settingsPath);
|
|
701
|
+
if (!raw) return void 0;
|
|
702
|
+
let parsed;
|
|
703
|
+
try {
|
|
704
|
+
parsed = JSON.parse(raw);
|
|
705
|
+
} catch (err) {
|
|
706
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
707
|
+
throw new Error(`Failed to parse Claude settings at "${settingsPath}": ${message}`);
|
|
708
|
+
}
|
|
709
|
+
if (!isRecord(parsed) || !isRecord(parsed.env)) {
|
|
710
|
+
return void 0;
|
|
711
|
+
}
|
|
712
|
+
const values = {
|
|
713
|
+
ANTHROPIC_API_KEY: normalize(parsed.env.ANTHROPIC_API_KEY),
|
|
714
|
+
ANTHROPIC_AUTH_TOKEN: normalize(parsed.env.ANTHROPIC_AUTH_TOKEN),
|
|
715
|
+
ANTHROPIC_BASE_URL: normalize(parsed.env.ANTHROPIC_BASE_URL),
|
|
716
|
+
ANTHROPIC_MODEL: normalize(parsed.env.ANTHROPIC_MODEL)
|
|
717
|
+
};
|
|
718
|
+
if (!values.ANTHROPIC_API_KEY && !values.ANTHROPIC_AUTH_TOKEN && !values.ANTHROPIC_BASE_URL && !values.ANTHROPIC_MODEL) {
|
|
719
|
+
return void 0;
|
|
720
|
+
}
|
|
721
|
+
return values;
|
|
722
|
+
}
|
|
723
|
+
function tryReadClaudeSettingsEnv(env) {
|
|
724
|
+
try {
|
|
725
|
+
return readClaudeSettingsEnv(env);
|
|
726
|
+
} catch {
|
|
727
|
+
return void 0;
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
function validateAnthropicBaseURL(value) {
|
|
731
|
+
const normalized = value.trim();
|
|
732
|
+
try {
|
|
733
|
+
const parsed = new URL(normalized);
|
|
734
|
+
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
|
|
735
|
+
throw new Error("Must use http:// or https:// protocol.");
|
|
736
|
+
}
|
|
737
|
+
} catch (err) {
|
|
738
|
+
const message = err instanceof Error ? err.message : "Must be a valid http(s) URL.";
|
|
739
|
+
throw new Error(`Invalid ANTHROPIC_BASE_URL: "${normalized}". ${message}`);
|
|
740
|
+
}
|
|
741
|
+
return normalized;
|
|
742
|
+
}
|
|
743
|
+
function resolveAnthropicAuthFromEnv(env = process.env) {
|
|
744
|
+
const explicitApiKey = normalize(env.ANTHROPIC_API_KEY);
|
|
745
|
+
if (explicitApiKey) return { apiKey: explicitApiKey };
|
|
746
|
+
const explicitAuthToken = normalize(env.ANTHROPIC_AUTH_TOKEN);
|
|
747
|
+
if (explicitAuthToken) return { authToken: explicitAuthToken };
|
|
748
|
+
const fallback = readClaudeSettingsEnv(env);
|
|
749
|
+
if (fallback?.ANTHROPIC_API_KEY) return { apiKey: fallback.ANTHROPIC_API_KEY };
|
|
750
|
+
if (fallback?.ANTHROPIC_AUTH_TOKEN) return { authToken: fallback.ANTHROPIC_AUTH_TOKEN };
|
|
751
|
+
return {};
|
|
752
|
+
}
|
|
753
|
+
function resolveAnthropicModelFromEnv(env = process.env) {
|
|
754
|
+
const explicitModel = env.LLMWIKI_MODEL;
|
|
755
|
+
if (explicitModel !== void 0) return explicitModel;
|
|
756
|
+
return tryReadClaudeSettingsEnv(env)?.ANTHROPIC_MODEL;
|
|
757
|
+
}
|
|
758
|
+
function resolveAnthropicBaseURLFromEnv(env = process.env) {
|
|
759
|
+
const explicitBaseURL = normalize(env.ANTHROPIC_BASE_URL);
|
|
760
|
+
if (explicitBaseURL) return validateAnthropicBaseURL(explicitBaseURL);
|
|
761
|
+
const fallbackBaseURL = tryReadClaudeSettingsEnv(env)?.ANTHROPIC_BASE_URL;
|
|
762
|
+
if (!fallbackBaseURL) return void 0;
|
|
763
|
+
return validateAnthropicBaseURL(fallbackBaseURL);
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
// src/utils/provider.ts
|
|
767
|
+
var SUPPORTED_PROVIDERS = /* @__PURE__ */ new Set(["anthropic", "openai", "ollama", "minimax"]);
|
|
768
|
+
function getProvider() {
|
|
769
|
+
const providerName = getProviderName();
|
|
770
|
+
switch (providerName) {
|
|
771
|
+
case "anthropic":
|
|
772
|
+
return getAnthropicProvider();
|
|
773
|
+
case "openai":
|
|
774
|
+
return new OpenAIProvider(getModelForProvider("openai"), {
|
|
775
|
+
baseURL: readOptionalEnv("OPENAI_BASE_URL"),
|
|
776
|
+
embeddingsBaseURL: readOptionalEnv("OPENAI_EMBEDDINGS_BASE_URL"),
|
|
777
|
+
embeddingModel: readOptionalEnv("LLMWIKI_EMBEDDING_MODEL")
|
|
778
|
+
});
|
|
779
|
+
case "ollama":
|
|
780
|
+
return new OllamaProvider(getModelForProvider("ollama"), {
|
|
781
|
+
baseURL: readOptionalEnv("OLLAMA_HOST") ?? OLLAMA_DEFAULT_HOST,
|
|
782
|
+
embeddingsBaseURL: readOptionalEnv("OLLAMA_EMBEDDINGS_HOST"),
|
|
783
|
+
embeddingModel: readOptionalEnv("LLMWIKI_EMBEDDING_MODEL")
|
|
784
|
+
});
|
|
785
|
+
case "minimax":
|
|
786
|
+
return getMiniMaxProvider();
|
|
787
|
+
default:
|
|
788
|
+
throw new Error(`Unhandled provider: ${providerName}`);
|
|
789
|
+
}
|
|
790
|
+
}
|
|
791
|
+
function readOptionalEnv(name) {
|
|
792
|
+
const value = process.env[name]?.trim();
|
|
793
|
+
return value ? value : void 0;
|
|
794
|
+
}
|
|
795
|
+
function getModelForProvider(providerName) {
|
|
796
|
+
return process.env.LLMWIKI_MODEL ?? PROVIDER_MODELS[providerName];
|
|
797
|
+
}
|
|
798
|
+
function getMiniMaxProvider() {
|
|
799
|
+
const apiKey = process.env.MINIMAX_API_KEY;
|
|
800
|
+
if (!apiKey) {
|
|
801
|
+
throw new Error(
|
|
802
|
+
"MiniMax provider requires MINIMAX_API_KEY environment variable.\n Set it with: export MINIMAX_API_KEY=your_key"
|
|
803
|
+
);
|
|
804
|
+
}
|
|
805
|
+
return new MiniMaxProvider(getModelForProvider("minimax"), apiKey);
|
|
806
|
+
}
|
|
807
|
+
function getAnthropicProvider() {
|
|
808
|
+
const model = resolveAnthropicModelFromEnv() ?? PROVIDER_MODELS.anthropic;
|
|
809
|
+
const baseURL = resolveAnthropicBaseURLFromEnv();
|
|
810
|
+
const auth = resolveAnthropicAuthFromEnv();
|
|
811
|
+
return new AnthropicProvider(model, {
|
|
812
|
+
baseURL,
|
|
813
|
+
...auth
|
|
814
|
+
});
|
|
815
|
+
}
|
|
816
|
+
function getProviderName() {
|
|
817
|
+
const providerName = process.env.LLMWIKI_PROVIDER ?? DEFAULT_PROVIDER;
|
|
818
|
+
if (!SUPPORTED_PROVIDERS.has(providerName)) {
|
|
819
|
+
throw new Error(
|
|
820
|
+
`Unknown provider "${providerName}". Supported: ${[...SUPPORTED_PROVIDERS].join(", ")}`
|
|
821
|
+
);
|
|
305
822
|
}
|
|
306
|
-
return
|
|
823
|
+
return providerName;
|
|
307
824
|
}
|
|
825
|
+
function getActiveProviderName() {
|
|
826
|
+
return getProviderName();
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
// src/utils/llm.ts
|
|
308
830
|
function sleep(ms) {
|
|
309
831
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
310
832
|
}
|
|
311
833
|
async function callClaude(options) {
|
|
312
834
|
const { system, messages, tools, maxTokens = 4096, stream = false, onToken } = options;
|
|
313
|
-
const
|
|
835
|
+
const provider = getProvider();
|
|
314
836
|
for (let attempt = 0; attempt <= RETRY_COUNT; attempt++) {
|
|
315
837
|
try {
|
|
316
838
|
if (stream) {
|
|
317
|
-
return await
|
|
839
|
+
return await provider.stream(system, messages, maxTokens, onToken);
|
|
318
840
|
}
|
|
319
841
|
if (tools && tools.length > 0) {
|
|
320
|
-
return await
|
|
842
|
+
return await provider.toolCall(system, messages, tools, maxTokens);
|
|
321
843
|
}
|
|
322
|
-
return await
|
|
844
|
+
return await provider.complete(system, messages, maxTokens);
|
|
323
845
|
} catch (error2) {
|
|
324
846
|
if (attempt === RETRY_COUNT) throw error2;
|
|
325
847
|
const delayMs = RETRY_BASE_MS * Math.pow(RETRY_MULTIPLIER, attempt);
|
|
@@ -331,57 +853,10 @@ async function callClaude(options) {
|
|
|
331
853
|
}
|
|
332
854
|
throw new Error("Unreachable");
|
|
333
855
|
}
|
|
334
|
-
async function callClaudeStreaming(anthropic, system, messages, maxTokens, onToken) {
|
|
335
|
-
const stream = anthropic.messages.stream({
|
|
336
|
-
model: MODEL,
|
|
337
|
-
max_tokens: maxTokens,
|
|
338
|
-
system,
|
|
339
|
-
messages
|
|
340
|
-
});
|
|
341
|
-
let fullText = "";
|
|
342
|
-
for await (const event of stream) {
|
|
343
|
-
if (event.type === "content_block_delta" && event.delta.type === "text_delta") {
|
|
344
|
-
fullText += event.delta.text;
|
|
345
|
-
onToken?.(event.delta.text);
|
|
346
|
-
}
|
|
347
|
-
}
|
|
348
|
-
return fullText;
|
|
349
|
-
}
|
|
350
|
-
async function callClaudeToolUse(anthropic, system, messages, tools, maxTokens) {
|
|
351
|
-
const response = await anthropic.messages.create({
|
|
352
|
-
model: MODEL,
|
|
353
|
-
max_tokens: maxTokens,
|
|
354
|
-
system,
|
|
355
|
-
messages,
|
|
356
|
-
tools
|
|
357
|
-
});
|
|
358
|
-
const toolBlock = response.content.find((block) => block.type === "tool_use");
|
|
359
|
-
if (toolBlock && toolBlock.type === "tool_use") {
|
|
360
|
-
return JSON.stringify(toolBlock.input);
|
|
361
|
-
}
|
|
362
|
-
const textBlock = response.content.find((block) => block.type === "text");
|
|
363
|
-
if (textBlock && textBlock.type === "text") {
|
|
364
|
-
return textBlock.text;
|
|
365
|
-
}
|
|
366
|
-
return "";
|
|
367
|
-
}
|
|
368
|
-
async function callClaudeBasic(anthropic, system, messages, maxTokens) {
|
|
369
|
-
const response = await anthropic.messages.create({
|
|
370
|
-
model: MODEL,
|
|
371
|
-
max_tokens: maxTokens,
|
|
372
|
-
system,
|
|
373
|
-
messages
|
|
374
|
-
});
|
|
375
|
-
const textBlock = response.content.find((block) => block.type === "text");
|
|
376
|
-
if (textBlock && textBlock.type === "text") {
|
|
377
|
-
return textBlock.text;
|
|
378
|
-
}
|
|
379
|
-
return "";
|
|
380
|
-
}
|
|
381
856
|
|
|
382
857
|
// src/utils/lock.ts
|
|
383
|
-
import { open, readFile as
|
|
384
|
-
import
|
|
858
|
+
import { open, readFile as readFile5, unlink, mkdir as mkdir4 } from "fs/promises";
|
|
859
|
+
import path8 from "path";
|
|
385
860
|
var RECLAIM_SUFFIX = ".reclaim";
|
|
386
861
|
var MAX_ACQUIRE_ATTEMPTS = 2;
|
|
387
862
|
function isProcessAlive(pid) {
|
|
@@ -393,8 +868,8 @@ function isProcessAlive(pid) {
|
|
|
393
868
|
}
|
|
394
869
|
}
|
|
395
870
|
async function acquireLock(root) {
|
|
396
|
-
const lockPath =
|
|
397
|
-
await mkdir4(
|
|
871
|
+
const lockPath = path8.join(root, LOCK_FILE);
|
|
872
|
+
await mkdir4(path8.join(root, LLMWIKI_DIR), { recursive: true });
|
|
398
873
|
for (let attempt = 0; attempt < MAX_ACQUIRE_ATTEMPTS; attempt++) {
|
|
399
874
|
const created = await tryCreateLock(lockPath);
|
|
400
875
|
if (created) return true;
|
|
@@ -457,7 +932,7 @@ async function tryCreateLock(lockPath) {
|
|
|
457
932
|
}
|
|
458
933
|
async function isLockStale(lockPath) {
|
|
459
934
|
try {
|
|
460
|
-
const content = await
|
|
935
|
+
const content = await readFile5(lockPath, "utf-8");
|
|
461
936
|
const pid = parseInt(content.trim(), 10);
|
|
462
937
|
if (isNaN(pid)) return true;
|
|
463
938
|
return !isProcessAlive(pid);
|
|
@@ -466,7 +941,7 @@ async function isLockStale(lockPath) {
|
|
|
466
941
|
}
|
|
467
942
|
}
|
|
468
943
|
async function releaseLock(root) {
|
|
469
|
-
const lockPath =
|
|
944
|
+
const lockPath = path8.join(root, LOCK_FILE);
|
|
470
945
|
try {
|
|
471
946
|
await unlink(lockPath);
|
|
472
947
|
} catch {
|
|
@@ -474,6 +949,12 @@ async function releaseLock(root) {
|
|
|
474
949
|
}
|
|
475
950
|
|
|
476
951
|
// src/compiler/prompts.ts
|
|
952
|
+
var PROVENANCE_STATE_VALUES = [
|
|
953
|
+
"extracted",
|
|
954
|
+
"merged",
|
|
955
|
+
"inferred",
|
|
956
|
+
"ambiguous"
|
|
957
|
+
];
|
|
477
958
|
var CONCEPT_EXTRACTION_TOOL = {
|
|
478
959
|
name: "extract_concepts",
|
|
479
960
|
description: "Extract knowledge concepts from a source document",
|
|
@@ -496,6 +977,36 @@ var CONCEPT_EXTRACTION_TOOL = {
|
|
|
496
977
|
is_new: {
|
|
497
978
|
type: "boolean",
|
|
498
979
|
description: "True if this is a new concept not in existing wiki"
|
|
980
|
+
},
|
|
981
|
+
tags: {
|
|
982
|
+
type: "array",
|
|
983
|
+
items: { type: "string" },
|
|
984
|
+
description: "2-4 categorical tags for organizing this concept (e.g., 'machine-learning', 'optimization')"
|
|
985
|
+
},
|
|
986
|
+
confidence: {
|
|
987
|
+
type: "number",
|
|
988
|
+
description: "Confidence in this concept on a 0..1 scale (1 = directly stated, 0 = highly speculative)."
|
|
989
|
+
},
|
|
990
|
+
provenance_state: {
|
|
991
|
+
type: "string",
|
|
992
|
+
enum: PROVENANCE_STATE_VALUES,
|
|
993
|
+
description: "How this concept was produced: 'extracted' (direct from source), 'merged' (synthesised across sources), 'inferred' (model deduction), or 'ambiguous' (sources disagree)."
|
|
994
|
+
},
|
|
995
|
+
contradicted_by: {
|
|
996
|
+
type: "array",
|
|
997
|
+
items: {
|
|
998
|
+
type: "object",
|
|
999
|
+
properties: {
|
|
1000
|
+
slug: { type: "string", description: "Slug of the contradicting concept." },
|
|
1001
|
+
reason: { type: "string", description: "Brief reason for the contradiction." }
|
|
1002
|
+
},
|
|
1003
|
+
required: ["slug"]
|
|
1004
|
+
},
|
|
1005
|
+
description: "Slugs of other concepts whose evidence contradicts this one."
|
|
1006
|
+
},
|
|
1007
|
+
inferred_paragraphs: {
|
|
1008
|
+
type: "integer",
|
|
1009
|
+
description: "Estimated number of paragraphs in the page that will be inferred rather than directly cited."
|
|
499
1010
|
}
|
|
500
1011
|
},
|
|
501
1012
|
required: ["concept", "summary", "is_new"]
|
|
@@ -517,6 +1028,17 @@ ${existingIndex}` : "\n\nNo existing wiki pages yet.";
|
|
|
517
1028
|
"Each concept should be a standalone topic that someone might look up.",
|
|
518
1029
|
"Focus on key ideas, techniques, patterns, or entities \u2014 not trivial details.",
|
|
519
1030
|
"Use the extract_concepts tool to return your findings.",
|
|
1031
|
+
"",
|
|
1032
|
+
"For every concept, emit provenance metadata so downstream tools can reason",
|
|
1033
|
+
"about reliability:",
|
|
1034
|
+
" - confidence: 0..1 \u2014 how certain you are the source supports this concept.",
|
|
1035
|
+
" - provenance_state: 'extracted' if directly stated, 'merged' if synthesised",
|
|
1036
|
+
" from multiple parts of the source, 'inferred' if reasoned from context,",
|
|
1037
|
+
" or 'ambiguous' if the source is contradictory or unclear.",
|
|
1038
|
+
" - contradicted_by: slugs of other concepts (in this batch or the index)",
|
|
1039
|
+
" whose evidence conflicts with this one.",
|
|
1040
|
+
" - inferred_paragraphs: estimated number of paragraphs in the resulting",
|
|
1041
|
+
" page that will be inferred rather than directly citable.",
|
|
520
1042
|
indexSection,
|
|
521
1043
|
"\n\n--- SOURCE DOCUMENT ---\n\n",
|
|
522
1044
|
sourceContent
|
|
@@ -539,64 +1061,60 @@ ${relatedPages}` : "";
|
|
|
539
1061
|
"Include a ## Sources section at the end listing the source document.",
|
|
540
1062
|
"Suggest [[wikilinks]] to related concepts where appropriate.",
|
|
541
1063
|
"Write in a neutral, informative tone. Be concise but thorough.",
|
|
1064
|
+
"",
|
|
1065
|
+
"Source attribution: at the end of each prose paragraph, append a citation",
|
|
1066
|
+
"marker showing which source file(s) the paragraph drew from.",
|
|
1067
|
+
"Format: ^[filename.md] for single-source, ^[source-a.md, source-b.md] for multi-source.",
|
|
1068
|
+
"Place citations only at the end of prose paragraphs \u2014 not on headings, list items, or code blocks.",
|
|
1069
|
+
"Source filenames are visible as `--- SOURCE: filename.md ---` headers in the content below.",
|
|
1070
|
+
"",
|
|
1071
|
+
"If a paragraph is your inference rather than a direct extraction, leave it",
|
|
1072
|
+
"uncited \u2014 downstream lint rules will count uncited paragraphs as 'inferred'",
|
|
1073
|
+
"to compute the page's provenance metadata.",
|
|
542
1074
|
existingSection,
|
|
543
1075
|
relatedSection,
|
|
544
1076
|
"\n\n--- SOURCE MATERIAL ---\n\n",
|
|
545
1077
|
sourceContent
|
|
546
1078
|
].join("\n");
|
|
547
1079
|
}
|
|
548
|
-
function
|
|
549
|
-
|
|
550
|
-
const parsed = JSON.parse(toolOutput);
|
|
551
|
-
const concepts = parsed.concepts ?? [];
|
|
552
|
-
return concepts.filter(
|
|
553
|
-
(c) => typeof c.concept === "string" && typeof c.summary === "string" && typeof c.is_new === "boolean"
|
|
554
|
-
);
|
|
555
|
-
} catch {
|
|
556
|
-
return [];
|
|
557
|
-
}
|
|
558
|
-
}
|
|
559
|
-
|
|
560
|
-
// src/compiler/hasher.ts
|
|
561
|
-
import { createHash } from "crypto";
|
|
562
|
-
import { readFile as readFile5, readdir } from "fs/promises";
|
|
563
|
-
import path6 from "path";
|
|
564
|
-
async function hashFile(filePath) {
|
|
565
|
-
const content = await readFile5(filePath, "utf-8");
|
|
566
|
-
return createHash("sha256").update(content).digest("hex");
|
|
1080
|
+
function isValidRawConcept(c) {
|
|
1081
|
+
return typeof c.concept === "string" && typeof c.summary === "string" && typeof c.is_new === "boolean" && (c.tags === void 0 || Array.isArray(c.tags));
|
|
567
1082
|
}
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
const
|
|
571
|
-
const
|
|
572
|
-
|
|
573
|
-
const
|
|
574
|
-
|
|
1083
|
+
function coerceContradictedBy(raw) {
|
|
1084
|
+
if (!Array.isArray(raw)) return void 0;
|
|
1085
|
+
const refs = [];
|
|
1086
|
+
for (const entry of raw) {
|
|
1087
|
+
if (!entry || typeof entry !== "object") continue;
|
|
1088
|
+
const obj = entry;
|
|
1089
|
+
if (typeof obj.slug !== "string" || obj.slug.trim().length === 0) continue;
|
|
1090
|
+
const ref = { slug: obj.slug.trim() };
|
|
1091
|
+
if (typeof obj.reason === "string") ref.reason = obj.reason;
|
|
1092
|
+
refs.push(ref);
|
|
575
1093
|
}
|
|
576
|
-
|
|
577
|
-
changes.push(...deletedChanges);
|
|
578
|
-
return changes;
|
|
1094
|
+
return refs.length > 0 ? refs : void 0;
|
|
579
1095
|
}
|
|
580
|
-
|
|
1096
|
+
function mapRawConcept(c) {
|
|
1097
|
+
const provenance = typeof c.provenance_state === "string" && PROVENANCE_STATE_VALUES.includes(c.provenance_state) ? c.provenance_state : void 0;
|
|
1098
|
+
return {
|
|
1099
|
+
concept: c.concept,
|
|
1100
|
+
summary: c.summary,
|
|
1101
|
+
is_new: c.is_new,
|
|
1102
|
+
tags: Array.isArray(c.tags) ? c.tags : void 0,
|
|
1103
|
+
confidence: typeof c.confidence === "number" ? c.confidence : void 0,
|
|
1104
|
+
provenanceState: provenance,
|
|
1105
|
+
contradictedBy: coerceContradictedBy(c.contradicted_by),
|
|
1106
|
+
inferredParagraphs: typeof c.inferred_paragraphs === "number" && Number.isInteger(c.inferred_paragraphs) && c.inferred_paragraphs >= 0 ? c.inferred_paragraphs : void 0
|
|
1107
|
+
};
|
|
1108
|
+
}
|
|
1109
|
+
function parseConcepts(toolOutput) {
|
|
581
1110
|
try {
|
|
582
|
-
const
|
|
583
|
-
|
|
1111
|
+
const parsed = JSON.parse(toolOutput);
|
|
1112
|
+
const concepts = parsed.concepts ?? [];
|
|
1113
|
+
return concepts.filter(isValidRawConcept).map(mapRawConcept);
|
|
584
1114
|
} catch {
|
|
585
1115
|
return [];
|
|
586
1116
|
}
|
|
587
1117
|
}
|
|
588
|
-
async function classifyFile(root, file, prevState) {
|
|
589
|
-
const filePath = path6.join(root, SOURCES_DIR, file);
|
|
590
|
-
const hash = await hashFile(filePath);
|
|
591
|
-
const prev = prevState.sources[file];
|
|
592
|
-
if (!prev) return "new";
|
|
593
|
-
if (prev.hash !== hash) return "changed";
|
|
594
|
-
return "unchanged";
|
|
595
|
-
}
|
|
596
|
-
function findDeletedFiles(currentFiles, prevState) {
|
|
597
|
-
const currentSet = new Set(currentFiles);
|
|
598
|
-
return Object.keys(prevState.sources).filter((file) => !currentSet.has(file)).map((file) => ({ file, status: "deleted" }));
|
|
599
|
-
}
|
|
600
1118
|
|
|
601
1119
|
// src/compiler/deps.ts
|
|
602
1120
|
function buildConceptToSourcesMap(sources) {
|
|
@@ -613,28 +1131,37 @@ function buildConceptToSourcesMap(sources) {
|
|
|
613
1131
|
}
|
|
614
1132
|
return conceptMap;
|
|
615
1133
|
}
|
|
616
|
-
function
|
|
617
|
-
const
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
const deletedFiles = new Set(
|
|
621
|
-
directChanges.filter((c) => c.status === "deleted").map((c) => c.file)
|
|
1134
|
+
function filesByStatus(changes, ...statuses) {
|
|
1135
|
+
const statusSet = new Set(statuses);
|
|
1136
|
+
return new Set(
|
|
1137
|
+
changes.filter((c) => statusSet.has(c.status)).map((c) => c.file)
|
|
622
1138
|
);
|
|
1139
|
+
}
|
|
1140
|
+
function collectSharedContributors(sourceFile, state, conceptMap, excludeSets, out) {
|
|
1141
|
+
const sourceEntry = state.sources[sourceFile];
|
|
1142
|
+
if (!sourceEntry) return;
|
|
1143
|
+
for (const slug of sourceEntry.concepts) {
|
|
1144
|
+
const contributors = conceptMap.get(slug);
|
|
1145
|
+
if (!contributors || contributors.length < 2) continue;
|
|
1146
|
+
for (const contributor of contributors) {
|
|
1147
|
+
const isExcluded = excludeSets.some((s) => s.has(contributor));
|
|
1148
|
+
if (!isExcluded) out.add(contributor);
|
|
1149
|
+
}
|
|
1150
|
+
}
|
|
1151
|
+
}
|
|
1152
|
+
function findAffectedSources(state, directChanges) {
|
|
1153
|
+
const changedFiles = filesByStatus(directChanges, "new", "changed");
|
|
1154
|
+
const deletedFiles = filesByStatus(directChanges, "deleted");
|
|
623
1155
|
const conceptMap = buildConceptToSourcesMap(state.sources);
|
|
624
1156
|
const affected = /* @__PURE__ */ new Set();
|
|
625
1157
|
for (const changedFile of changedFiles) {
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
if (!skip) {
|
|
634
|
-
affected.add(contributor);
|
|
635
|
-
}
|
|
636
|
-
}
|
|
637
|
-
}
|
|
1158
|
+
collectSharedContributors(
|
|
1159
|
+
changedFile,
|
|
1160
|
+
state,
|
|
1161
|
+
conceptMap,
|
|
1162
|
+
[changedFiles, deletedFiles, affected],
|
|
1163
|
+
affected
|
|
1164
|
+
);
|
|
638
1165
|
}
|
|
639
1166
|
return Array.from(affected);
|
|
640
1167
|
}
|
|
@@ -676,36 +1203,36 @@ async function persistFrozenSlugs(root, frozenSlugs, successfulExtractions) {
|
|
|
676
1203
|
const stateToSave = { ...currentState, frozenSlugs: Array.from(remaining) };
|
|
677
1204
|
await writeState(root, stateToSave);
|
|
678
1205
|
}
|
|
679
|
-
function
|
|
680
|
-
const compilingFiles = new Set(
|
|
681
|
-
allChanges.filter((c) => c.status === "new" || c.status === "changed").map((c) => c.file)
|
|
682
|
-
);
|
|
683
|
-
const deletedFiles = new Set(
|
|
684
|
-
allChanges.filter((c) => c.status === "deleted").map((c) => c.file)
|
|
685
|
-
);
|
|
686
|
-
const conceptMap = buildConceptToSourcesMap(state.sources);
|
|
1206
|
+
function collectFreshSlugs(extractions, state) {
|
|
687
1207
|
const freshSlugs = /* @__PURE__ */ new Set();
|
|
688
1208
|
for (const result of extractions) {
|
|
689
1209
|
const oldConcepts = new Set(state.sources[result.sourceFile]?.concepts ?? []);
|
|
690
1210
|
for (const c of result.concepts) {
|
|
691
1211
|
const slug = slugify(c.concept);
|
|
692
|
-
if (!oldConcepts.has(slug))
|
|
693
|
-
freshSlugs.add(slug);
|
|
694
|
-
}
|
|
1212
|
+
if (!oldConcepts.has(slug)) freshSlugs.add(slug);
|
|
695
1213
|
}
|
|
696
1214
|
}
|
|
1215
|
+
return freshSlugs;
|
|
1216
|
+
}
|
|
1217
|
+
function findSlugOwners(slugs, conceptMap, excludeSets) {
|
|
697
1218
|
const affected = /* @__PURE__ */ new Set();
|
|
698
|
-
for (const slug of
|
|
1219
|
+
for (const slug of slugs) {
|
|
699
1220
|
const owners = conceptMap.get(slug);
|
|
700
1221
|
if (!owners) continue;
|
|
701
1222
|
for (const owner of owners) {
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
}
|
|
1223
|
+
const isExcluded = excludeSets.some((s) => s.has(owner));
|
|
1224
|
+
if (!isExcluded) affected.add(owner);
|
|
705
1225
|
}
|
|
706
1226
|
}
|
|
707
1227
|
return Array.from(affected);
|
|
708
1228
|
}
|
|
1229
|
+
function findLateAffectedSources(extractions, state, allChanges) {
|
|
1230
|
+
const compilingFiles = filesByStatus(allChanges, "new", "changed");
|
|
1231
|
+
const deletedFiles = filesByStatus(allChanges, "deleted");
|
|
1232
|
+
const conceptMap = buildConceptToSourcesMap(state.sources);
|
|
1233
|
+
const freshSlugs = collectFreshSlugs(extractions, state);
|
|
1234
|
+
return findSlugOwners(freshSlugs, conceptMap, [compilingFiles, deletedFiles]);
|
|
1235
|
+
}
|
|
709
1236
|
function findSharedConcepts(sourceFile, state) {
|
|
710
1237
|
const shared = /* @__PURE__ */ new Set();
|
|
711
1238
|
const sourceEntry = state.sources[sourceFile];
|
|
@@ -735,7 +1262,7 @@ async function freezeFailedExtractions(root, results, frozenSlugs) {
|
|
|
735
1262
|
}
|
|
736
1263
|
|
|
737
1264
|
// src/compiler/orphan.ts
|
|
738
|
-
import
|
|
1265
|
+
import path9 from "path";
|
|
739
1266
|
async function markOrphaned(root, sourceFile, state) {
|
|
740
1267
|
const sourceEntry = state.sources[sourceFile];
|
|
741
1268
|
if (!sourceEntry) return;
|
|
@@ -761,7 +1288,7 @@ async function orphanUnownedFrozenPages(root, frozenSlugs) {
|
|
|
761
1288
|
}
|
|
762
1289
|
}
|
|
763
1290
|
async function orphanPage(root, slug, reason) {
|
|
764
|
-
const pagePath =
|
|
1291
|
+
const pagePath = path9.join(root, CONCEPTS_DIR, `${slug}.md`);
|
|
765
1292
|
const content = await safeReadFile(pagePath);
|
|
766
1293
|
if (!content) return;
|
|
767
1294
|
const { meta } = parseFrontmatter(content);
|
|
@@ -773,16 +1300,16 @@ async function orphanPage(root, slug, reason) {
|
|
|
773
1300
|
|
|
774
1301
|
// src/compiler/resolver.ts
|
|
775
1302
|
import { readdir as readdir2, readFile as readFile6 } from "fs/promises";
|
|
776
|
-
import
|
|
1303
|
+
import path10 from "path";
|
|
777
1304
|
import { existsSync as existsSync2 } from "fs";
|
|
778
1305
|
async function buildTitleIndex(root) {
|
|
779
|
-
const conceptsDir =
|
|
1306
|
+
const conceptsDir = path10.join(root, CONCEPTS_DIR);
|
|
780
1307
|
if (!existsSync2(conceptsDir)) return [];
|
|
781
1308
|
const files = await readdir2(conceptsDir);
|
|
782
1309
|
const pages = [];
|
|
783
1310
|
for (const file of files) {
|
|
784
1311
|
if (!file.endsWith(".md")) continue;
|
|
785
|
-
const filePath =
|
|
1312
|
+
const filePath = path10.join(conceptsDir, file);
|
|
786
1313
|
const content = await readFile6(filePath, "utf-8");
|
|
787
1314
|
const { meta } = parseFrontmatter(content);
|
|
788
1315
|
if (meta.title && typeof meta.title === "string" && !meta.orphaned) {
|
|
@@ -802,25 +1329,41 @@ function isInsideWikilink(text, position) {
|
|
|
802
1329
|
const closeBefore = text.indexOf("]]", before);
|
|
803
1330
|
return closeBefore >= position;
|
|
804
1331
|
}
|
|
1332
|
+
function isInsideCitation(text, position) {
|
|
1333
|
+
const before = text.lastIndexOf("^[", position);
|
|
1334
|
+
const after = text.indexOf("]", position);
|
|
1335
|
+
if (before === -1 || after === -1) return false;
|
|
1336
|
+
const closeBefore = text.indexOf("]", before);
|
|
1337
|
+
return closeBefore >= position;
|
|
1338
|
+
}
|
|
805
1339
|
function isWordBoundary(text, start, end) {
|
|
806
1340
|
const before = start === 0 || /[\s,.:;!?()\[\]{}/"']/.test(text[start - 1]);
|
|
807
1341
|
const after = end >= text.length || /[\s,.:;!?()\[\]{}/"']/.test(text[end]);
|
|
808
1342
|
return before && after;
|
|
809
1343
|
}
|
|
810
|
-
function
|
|
1344
|
+
function findTitleMatches(text, title) {
|
|
1345
|
+
const escaped = title.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1346
|
+
const regex = new RegExp(escaped, "gi");
|
|
1347
|
+
const matches = [];
|
|
1348
|
+
let match;
|
|
1349
|
+
while ((match = regex.exec(text)) !== null) {
|
|
1350
|
+
matches.push({ start: match.index, end: match.index + match[0].length });
|
|
1351
|
+
}
|
|
1352
|
+
return matches;
|
|
1353
|
+
}
|
|
1354
|
+
function isLinkablePosition(text, start, end) {
|
|
1355
|
+
if (isInsideWikilink(text, start)) return false;
|
|
1356
|
+
if (isInsideCitation(text, start)) return false;
|
|
1357
|
+
return isWordBoundary(text, start, end);
|
|
1358
|
+
}
|
|
1359
|
+
function addWikilinks(body, titles, selfTitle) {
|
|
811
1360
|
let result = body;
|
|
1361
|
+
const selfLower = selfTitle.toLowerCase();
|
|
812
1362
|
for (const page of titles) {
|
|
813
|
-
if (page.title.toLowerCase() ===
|
|
814
|
-
const
|
|
815
|
-
const regex = new RegExp(escaped, "gi");
|
|
816
|
-
let match;
|
|
817
|
-
const matches = [];
|
|
818
|
-
while ((match = regex.exec(result)) !== null) {
|
|
819
|
-
matches.push({ start: match.index, end: match.index + match[0].length });
|
|
820
|
-
}
|
|
1363
|
+
if (page.title.toLowerCase() === selfLower) continue;
|
|
1364
|
+
const matches = findTitleMatches(result, page.title);
|
|
821
1365
|
for (const m of matches.reverse()) {
|
|
822
|
-
if (
|
|
823
|
-
if (!isWordBoundary(result, m.start, m.end)) continue;
|
|
1366
|
+
if (!isLinkablePosition(result, m.start, m.end)) continue;
|
|
824
1367
|
result = result.slice(0, m.start) + `[[${page.title}]]` + result.slice(m.end);
|
|
825
1368
|
}
|
|
826
1369
|
}
|
|
@@ -876,41 +1419,43 @@ async function linkPage(page, titleIndex) {
|
|
|
876
1419
|
|
|
877
1420
|
// src/compiler/indexgen.ts
|
|
878
1421
|
import { readdir as readdir3 } from "fs/promises";
|
|
879
|
-
import
|
|
1422
|
+
import path11 from "path";
|
|
880
1423
|
async function generateIndex(root) {
|
|
881
1424
|
status("*", info("Generating index..."));
|
|
882
|
-
const conceptsPath =
|
|
883
|
-
const queriesPath =
|
|
1425
|
+
const conceptsPath = path11.join(root, CONCEPTS_DIR);
|
|
1426
|
+
const queriesPath = path11.join(root, QUERIES_DIR);
|
|
884
1427
|
const concepts = await collectPageSummaries(conceptsPath);
|
|
885
1428
|
const queries = await collectPageSummaries(queriesPath);
|
|
886
1429
|
concepts.sort((a, b) => a.title.localeCompare(b.title));
|
|
887
1430
|
queries.sort((a, b) => a.title.localeCompare(b.title));
|
|
888
1431
|
const indexContent = buildIndexContent(concepts, queries);
|
|
889
|
-
const indexPath =
|
|
1432
|
+
const indexPath = path11.join(root, INDEX_FILE);
|
|
890
1433
|
await atomicWrite(indexPath, indexContent);
|
|
891
1434
|
const total = concepts.length + queries.length;
|
|
892
1435
|
status("+", success(`Index updated with ${total} pages.`));
|
|
893
1436
|
}
|
|
894
|
-
async function
|
|
1437
|
+
async function scanWikiPages(dirPath) {
|
|
895
1438
|
let files;
|
|
896
1439
|
try {
|
|
897
|
-
files = await readdir3(
|
|
1440
|
+
files = await readdir3(dirPath);
|
|
898
1441
|
} catch {
|
|
899
1442
|
return [];
|
|
900
1443
|
}
|
|
901
|
-
const
|
|
1444
|
+
const scanned = [];
|
|
902
1445
|
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
903
|
-
const content = await safeReadFile(
|
|
1446
|
+
const content = await safeReadFile(path11.join(dirPath, file));
|
|
904
1447
|
const { meta } = parseFrontmatter(content);
|
|
905
|
-
|
|
906
|
-
pages.push({
|
|
907
|
-
title: meta.title,
|
|
908
|
-
slug: file.replace(/\.md$/, ""),
|
|
909
|
-
summary: typeof meta.summary === "string" ? meta.summary : ""
|
|
910
|
-
});
|
|
911
|
-
}
|
|
1448
|
+
scanned.push({ slug: file.replace(/\.md$/, ""), meta });
|
|
912
1449
|
}
|
|
913
|
-
return
|
|
1450
|
+
return scanned;
|
|
1451
|
+
}
|
|
1452
|
+
async function collectPageSummaries(conceptsPath) {
|
|
1453
|
+
const scanned = await scanWikiPages(conceptsPath);
|
|
1454
|
+
return scanned.filter(({ meta }) => meta.title && typeof meta.title === "string" && !meta.orphaned).map(({ slug, meta }) => ({
|
|
1455
|
+
title: meta.title,
|
|
1456
|
+
slug,
|
|
1457
|
+
summary: typeof meta.summary === "string" ? meta.summary : ""
|
|
1458
|
+
}));
|
|
914
1459
|
}
|
|
915
1460
|
function stripWikilinks(text) {
|
|
916
1461
|
return text.replace(/\[\[([^\]]+)\]\]/g, "$1");
|
|
@@ -933,84 +1478,611 @@ function buildIndexContent(concepts, queries) {
|
|
|
933
1478
|
return lines.join("\n");
|
|
934
1479
|
}
|
|
935
1480
|
|
|
1481
|
+
// src/compiler/obsidian.ts
|
|
1482
|
+
import { readdir as readdir4 } from "fs/promises";
|
|
1483
|
+
import path12 from "path";
|
|
1484
|
+
var ABBREVIATION_MIN_WORDS = 3;
|
|
1485
|
+
var SWAP_CONJUNCTIONS = [" and ", " or "];
|
|
1486
|
+
function addObsidianMeta(frontmatter, conceptTitle, tags) {
|
|
1487
|
+
frontmatter.tags = tags;
|
|
1488
|
+
frontmatter.aliases = generateAliases(conceptTitle);
|
|
1489
|
+
}
|
|
1490
|
+
function generateAliases(title) {
|
|
1491
|
+
const aliases = [];
|
|
1492
|
+
const slug = slugify(title);
|
|
1493
|
+
if (slug !== title) {
|
|
1494
|
+
aliases.push(slug);
|
|
1495
|
+
}
|
|
1496
|
+
const swapAlias = generateSwapAlias(title);
|
|
1497
|
+
if (swapAlias) {
|
|
1498
|
+
aliases.push(swapAlias);
|
|
1499
|
+
}
|
|
1500
|
+
const abbreviation = generateAbbreviation(title);
|
|
1501
|
+
if (abbreviation) {
|
|
1502
|
+
aliases.push(abbreviation);
|
|
1503
|
+
}
|
|
1504
|
+
return aliases;
|
|
1505
|
+
}
|
|
1506
|
+
function generateSwapAlias(title) {
|
|
1507
|
+
for (const conjunction of SWAP_CONJUNCTIONS) {
|
|
1508
|
+
const index = title.toLowerCase().indexOf(conjunction);
|
|
1509
|
+
if (index === -1) continue;
|
|
1510
|
+
const before = title.slice(0, index);
|
|
1511
|
+
const after = title.slice(index + conjunction.length);
|
|
1512
|
+
const originalConjunction = title.slice(index, index + conjunction.length);
|
|
1513
|
+
return `${after}${originalConjunction}${before}`;
|
|
1514
|
+
}
|
|
1515
|
+
return null;
|
|
1516
|
+
}
|
|
1517
|
+
function generateAbbreviation(title) {
|
|
1518
|
+
const words = title.split(/\s+/);
|
|
1519
|
+
if (words.length < ABBREVIATION_MIN_WORDS) return null;
|
|
1520
|
+
const abbreviation = words.map((w) => w[0].toUpperCase()).join("");
|
|
1521
|
+
if (abbreviation === title) return null;
|
|
1522
|
+
return abbreviation;
|
|
1523
|
+
}
|
|
1524
|
+
async function generateMOC(root) {
|
|
1525
|
+
const conceptsPath = path12.join(root, CONCEPTS_DIR);
|
|
1526
|
+
const pages = await loadConceptPages(conceptsPath);
|
|
1527
|
+
const tagGroups = groupPagesByTag(pages);
|
|
1528
|
+
const content = buildMOCContent(tagGroups);
|
|
1529
|
+
await atomicWrite(path12.join(root, MOC_FILE), content);
|
|
1530
|
+
}
|
|
1531
|
+
async function loadConceptPages(conceptsPath) {
|
|
1532
|
+
let files;
|
|
1533
|
+
try {
|
|
1534
|
+
files = await readdir4(conceptsPath);
|
|
1535
|
+
} catch {
|
|
1536
|
+
return [];
|
|
1537
|
+
}
|
|
1538
|
+
const pages = [];
|
|
1539
|
+
for (const file of files) {
|
|
1540
|
+
if (!file.endsWith(".md")) continue;
|
|
1541
|
+
const content = await safeReadFile(path12.join(conceptsPath, file));
|
|
1542
|
+
if (!content) continue;
|
|
1543
|
+
const { meta } = parseFrontmatter(content);
|
|
1544
|
+
if (meta.orphaned) continue;
|
|
1545
|
+
const title = typeof meta.title === "string" ? meta.title : file.replace(/\.md$/, "");
|
|
1546
|
+
const tags = Array.isArray(meta.tags) ? meta.tags : [];
|
|
1547
|
+
pages.push({ title, tags });
|
|
1548
|
+
}
|
|
1549
|
+
return pages;
|
|
1550
|
+
}
|
|
1551
|
+
function groupPagesByTag(pages) {
|
|
1552
|
+
const groups = /* @__PURE__ */ new Map();
|
|
1553
|
+
for (const page of pages) {
|
|
1554
|
+
if (page.tags.length === 0) {
|
|
1555
|
+
appendToGroup(groups, "Uncategorized", page.title);
|
|
1556
|
+
continue;
|
|
1557
|
+
}
|
|
1558
|
+
for (const tag of page.tags) {
|
|
1559
|
+
appendToGroup(groups, tag, page.title);
|
|
1560
|
+
}
|
|
1561
|
+
}
|
|
1562
|
+
return groups;
|
|
1563
|
+
}
|
|
1564
|
+
function appendToGroup(groups, key, title) {
|
|
1565
|
+
const existing = groups.get(key);
|
|
1566
|
+
if (existing) {
|
|
1567
|
+
existing.push(title);
|
|
1568
|
+
} else {
|
|
1569
|
+
groups.set(key, [title]);
|
|
1570
|
+
}
|
|
1571
|
+
}
|
|
1572
|
+
function buildMOCContent(tagGroups) {
|
|
1573
|
+
const lines = ["# Map of Content", ""];
|
|
1574
|
+
const sortedTags = [...tagGroups.keys()].sort((a, b) => {
|
|
1575
|
+
if (a === "Uncategorized") return 1;
|
|
1576
|
+
if (b === "Uncategorized") return -1;
|
|
1577
|
+
return a.localeCompare(b);
|
|
1578
|
+
});
|
|
1579
|
+
for (const tag of sortedTags) {
|
|
1580
|
+
const titles = tagGroups.get(tag) ?? [];
|
|
1581
|
+
lines.push(`## ${tag}`, "");
|
|
1582
|
+
for (const title of titles.sort()) {
|
|
1583
|
+
lines.push(`- [[${title}]]`);
|
|
1584
|
+
}
|
|
1585
|
+
lines.push("");
|
|
1586
|
+
}
|
|
1587
|
+
return lines.join("\n");
|
|
1588
|
+
}
|
|
1589
|
+
|
|
1590
|
+
// src/utils/embeddings.ts
|
|
1591
|
+
import { readFile as readFile7, readdir as readdir5 } from "fs/promises";
|
|
1592
|
+
import { existsSync as existsSync3 } from "fs";
|
|
1593
|
+
import path13 from "path";
|
|
1594
|
+
function cosineSimilarity(a, b) {
|
|
1595
|
+
if (a.length !== b.length || a.length === 0) return 0;
|
|
1596
|
+
let dot = 0;
|
|
1597
|
+
let magA = 0;
|
|
1598
|
+
let magB = 0;
|
|
1599
|
+
for (let i = 0; i < a.length; i++) {
|
|
1600
|
+
dot += a[i] * b[i];
|
|
1601
|
+
magA += a[i] * a[i];
|
|
1602
|
+
magB += b[i] * b[i];
|
|
1603
|
+
}
|
|
1604
|
+
if (magA === 0 || magB === 0) return 0;
|
|
1605
|
+
return dot / (Math.sqrt(magA) * Math.sqrt(magB));
|
|
1606
|
+
}
|
|
1607
|
+
function findTopK(queryVec, store, k) {
|
|
1608
|
+
const scored = store.entries.map((entry) => ({
|
|
1609
|
+
entry,
|
|
1610
|
+
score: cosineSimilarity(queryVec, entry.vector)
|
|
1611
|
+
}));
|
|
1612
|
+
scored.sort((left, right) => right.score - left.score);
|
|
1613
|
+
return scored.slice(0, k).map((item) => item.entry);
|
|
1614
|
+
}
|
|
1615
|
+
async function readEmbeddingStore(root) {
|
|
1616
|
+
const filePath = path13.join(root, EMBEDDINGS_FILE);
|
|
1617
|
+
if (!existsSync3(filePath)) return null;
|
|
1618
|
+
const raw = await readFile7(filePath, "utf-8");
|
|
1619
|
+
return JSON.parse(raw);
|
|
1620
|
+
}
|
|
1621
|
+
async function writeEmbeddingStore(root, store) {
|
|
1622
|
+
const filePath = path13.join(root, EMBEDDINGS_FILE);
|
|
1623
|
+
await atomicWrite(filePath, JSON.stringify(store, null, 2));
|
|
1624
|
+
}
|
|
1625
|
+
async function findRelevantPages(root, question) {
|
|
1626
|
+
const store = await readEmbeddingStore(root);
|
|
1627
|
+
if (!store || store.entries.length === 0) return [];
|
|
1628
|
+
const activeModel = resolveEmbeddingModel();
|
|
1629
|
+
if (store.model !== activeModel) {
|
|
1630
|
+
warnStaleEmbeddingStore(store.model, activeModel);
|
|
1631
|
+
return [];
|
|
1632
|
+
}
|
|
1633
|
+
const queryVec = await getProvider().embed(question);
|
|
1634
|
+
return findTopK(queryVec, store, EMBEDDING_TOP_K).map((entry) => ({
|
|
1635
|
+
slug: entry.slug,
|
|
1636
|
+
title: entry.title,
|
|
1637
|
+
summary: entry.summary
|
|
1638
|
+
}));
|
|
1639
|
+
}
|
|
1640
|
+
async function collectPageRecords(root) {
|
|
1641
|
+
const records = [];
|
|
1642
|
+
for (const dir of [CONCEPTS_DIR, QUERIES_DIR]) {
|
|
1643
|
+
const absDir = path13.join(root, dir);
|
|
1644
|
+
let files;
|
|
1645
|
+
try {
|
|
1646
|
+
files = await readdir5(absDir);
|
|
1647
|
+
} catch {
|
|
1648
|
+
continue;
|
|
1649
|
+
}
|
|
1650
|
+
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
1651
|
+
const content = await safeReadFile(path13.join(absDir, file));
|
|
1652
|
+
const { meta } = parseFrontmatter(content);
|
|
1653
|
+
if (meta.orphaned || typeof meta.title !== "string") continue;
|
|
1654
|
+
records.push({
|
|
1655
|
+
slug: file.replace(/\.md$/, ""),
|
|
1656
|
+
title: meta.title,
|
|
1657
|
+
summary: typeof meta.summary === "string" ? meta.summary : ""
|
|
1658
|
+
});
|
|
1659
|
+
}
|
|
1660
|
+
}
|
|
1661
|
+
return records;
|
|
1662
|
+
}
|
|
1663
|
+
function buildEmbeddingText(record) {
|
|
1664
|
+
return record.summary ? `${record.title}
|
|
1665
|
+
|
|
1666
|
+
${record.summary}` : record.title;
|
|
1667
|
+
}
|
|
1668
|
+
async function embedPages(records, slugsToEmbed) {
|
|
1669
|
+
const provider = getProvider();
|
|
1670
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1671
|
+
const fresh = [];
|
|
1672
|
+
for (const record of records) {
|
|
1673
|
+
if (!slugsToEmbed.has(record.slug)) continue;
|
|
1674
|
+
const vector = await provider.embed(buildEmbeddingText(record));
|
|
1675
|
+
fresh.push({
|
|
1676
|
+
slug: record.slug,
|
|
1677
|
+
title: record.title,
|
|
1678
|
+
summary: record.summary,
|
|
1679
|
+
vector,
|
|
1680
|
+
updatedAt: now
|
|
1681
|
+
});
|
|
1682
|
+
}
|
|
1683
|
+
return fresh;
|
|
1684
|
+
}
|
|
1685
|
+
var warnedStaleModels = /* @__PURE__ */ new Set();
|
|
1686
|
+
function warnStaleEmbeddingStore(storedModel, activeModel) {
|
|
1687
|
+
const key = `${storedModel}\u2192${activeModel}`;
|
|
1688
|
+
if (warnedStaleModels.has(key)) return;
|
|
1689
|
+
warnedStaleModels.add(key);
|
|
1690
|
+
status(
|
|
1691
|
+
"!",
|
|
1692
|
+
warn(
|
|
1693
|
+
`Embedding store was built with "${storedModel}" but active embedding model is "${activeModel}". Falling back to full-index selection. Run 'llmwiki compile' to rebuild embeddings.`
|
|
1694
|
+
)
|
|
1695
|
+
);
|
|
1696
|
+
}
|
|
1697
|
+
function resolveEmbeddingModel() {
|
|
1698
|
+
const providerName = getActiveProviderName();
|
|
1699
|
+
const configuredModel = process.env.LLMWIKI_EMBEDDING_MODEL?.trim();
|
|
1700
|
+
if (configuredModel && (providerName === "openai" || providerName === "ollama")) {
|
|
1701
|
+
return configuredModel;
|
|
1702
|
+
}
|
|
1703
|
+
return EMBEDDING_MODELS[providerName] ?? EMBEDDING_MODELS.anthropic;
|
|
1704
|
+
}
|
|
1705
|
+
function mergeEntries(existing, fresh, liveSlugs) {
|
|
1706
|
+
const bySlug = /* @__PURE__ */ new Map();
|
|
1707
|
+
for (const entry of existing) {
|
|
1708
|
+
if (liveSlugs.has(entry.slug)) bySlug.set(entry.slug, entry);
|
|
1709
|
+
}
|
|
1710
|
+
for (const entry of fresh) {
|
|
1711
|
+
bySlug.set(entry.slug, entry);
|
|
1712
|
+
}
|
|
1713
|
+
return Array.from(bySlug.values());
|
|
1714
|
+
}
|
|
1715
|
+
async function updateEmbeddings(root, changedSlugs) {
|
|
1716
|
+
const records = await collectPageRecords(root);
|
|
1717
|
+
const liveSlugs = new Set(records.map((r) => r.slug));
|
|
1718
|
+
const embeddingModel = resolveEmbeddingModel();
|
|
1719
|
+
const existingStore = await readEmbeddingStore(root);
|
|
1720
|
+
const modelChanged = Boolean(existingStore && existingStore.model !== embeddingModel);
|
|
1721
|
+
const toEmbed = new Set(changedSlugs.filter((slug) => liveSlugs.has(slug)));
|
|
1722
|
+
const previousEntries = modelChanged ? [] : existingStore?.entries ?? [];
|
|
1723
|
+
if (!existingStore || modelChanged) {
|
|
1724
|
+
for (const record of records) toEmbed.add(record.slug);
|
|
1725
|
+
}
|
|
1726
|
+
if (!modelChanged && toEmbed.size === 0 && previousEntries.every((e) => liveSlugs.has(e.slug))) {
|
|
1727
|
+
return;
|
|
1728
|
+
}
|
|
1729
|
+
const freshEntries = await embedPages(records, toEmbed);
|
|
1730
|
+
const mergedEntries = mergeEntries(previousEntries, freshEntries, liveSlugs);
|
|
1731
|
+
const dimensions = mergedEntries[0]?.vector.length ?? 0;
|
|
1732
|
+
const store = {
|
|
1733
|
+
version: 1,
|
|
1734
|
+
model: embeddingModel,
|
|
1735
|
+
dimensions,
|
|
1736
|
+
entries: mergedEntries
|
|
1737
|
+
};
|
|
1738
|
+
await writeEmbeddingStore(root, store);
|
|
1739
|
+
status("*", dim(`Embeddings updated (${mergedEntries.length} pages).`));
|
|
1740
|
+
}
|
|
1741
|
+
|
|
1742
|
+
// src/compiler/candidates.ts
|
|
1743
|
+
import { readdir as readdir6, rename as rename3, unlink as unlink2, writeFile as writeFile4, mkdir as mkdir5 } from "fs/promises";
|
|
1744
|
+
import { existsSync as existsSync4 } from "fs";
|
|
1745
|
+
import path14 from "path";
|
|
1746
|
+
import { randomBytes } from "crypto";
|
|
1747
|
+
var ID_SUFFIX_BYTES = 4;
|
|
1748
|
+
var CANDIDATE_EXT = ".json";
|
|
1749
|
+
function buildCandidateId(slug) {
|
|
1750
|
+
const suffix = randomBytes(ID_SUFFIX_BYTES).toString("hex");
|
|
1751
|
+
return `${slug}-${suffix}`;
|
|
1752
|
+
}
|
|
1753
|
+
function candidatePath(root, id) {
|
|
1754
|
+
return path14.join(root, CANDIDATES_DIR, `${id}${CANDIDATE_EXT}`);
|
|
1755
|
+
}
|
|
1756
|
+
function archivePath(root, id) {
|
|
1757
|
+
return path14.join(root, CANDIDATES_ARCHIVE_DIR, `${id}${CANDIDATE_EXT}`);
|
|
1758
|
+
}
|
|
1759
|
+
async function writeCandidate(root, draft) {
|
|
1760
|
+
const candidate = {
|
|
1761
|
+
id: buildCandidateId(draft.slug),
|
|
1762
|
+
title: draft.title,
|
|
1763
|
+
slug: draft.slug,
|
|
1764
|
+
summary: draft.summary,
|
|
1765
|
+
sources: draft.sources,
|
|
1766
|
+
body: draft.body,
|
|
1767
|
+
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1768
|
+
...draft.sourceStates ? { sourceStates: draft.sourceStates } : {}
|
|
1769
|
+
};
|
|
1770
|
+
await atomicWrite(candidatePath(root, candidate.id), JSON.stringify(candidate, null, 2));
|
|
1771
|
+
return candidate;
|
|
1772
|
+
}
|
|
1773
|
+
function failWithError(message) {
|
|
1774
|
+
status("!", error(message));
|
|
1775
|
+
process.exitCode = 1;
|
|
1776
|
+
return null;
|
|
1777
|
+
}
|
|
1778
|
+
async function loadCandidateOrFail(root, id) {
|
|
1779
|
+
const candidate = await readCandidate(root, id);
|
|
1780
|
+
if (!candidate) return failWithError(`Candidate not found: ${id}`);
|
|
1781
|
+
return candidate;
|
|
1782
|
+
}
|
|
1783
|
+
async function loadCandidateUnderLockOrFail(root, id) {
|
|
1784
|
+
const candidate = await readCandidate(root, id);
|
|
1785
|
+
if (!candidate) {
|
|
1786
|
+
return failWithError(`Candidate ${id} was removed by another process during review.`);
|
|
1787
|
+
}
|
|
1788
|
+
return candidate;
|
|
1789
|
+
}
|
|
1790
|
+
async function readCandidate(root, id) {
|
|
1791
|
+
const raw = await safeReadFile(candidatePath(root, id));
|
|
1792
|
+
if (!raw) return null;
|
|
1793
|
+
try {
|
|
1794
|
+
const parsed = JSON.parse(raw);
|
|
1795
|
+
if (!isValidCandidate(parsed)) return null;
|
|
1796
|
+
return parsed;
|
|
1797
|
+
} catch {
|
|
1798
|
+
return null;
|
|
1799
|
+
}
|
|
1800
|
+
}
|
|
1801
|
+
function isValidCandidate(value) {
|
|
1802
|
+
if (!value || typeof value !== "object") return false;
|
|
1803
|
+
const candidate = value;
|
|
1804
|
+
return typeof candidate.id === "string" && typeof candidate.title === "string" && typeof candidate.slug === "string" && typeof candidate.body === "string" && Array.isArray(candidate.sources);
|
|
1805
|
+
}
|
|
1806
|
+
async function listCandidates(root) {
|
|
1807
|
+
const dir = path14.join(root, CANDIDATES_DIR);
|
|
1808
|
+
if (!existsSync4(dir)) return [];
|
|
1809
|
+
const entries = await readdir6(dir, { withFileTypes: true });
|
|
1810
|
+
const candidates = [];
|
|
1811
|
+
for (const entry of entries) {
|
|
1812
|
+
if (!entry.isFile() || !entry.name.endsWith(CANDIDATE_EXT)) continue;
|
|
1813
|
+
const id = entry.name.slice(0, -CANDIDATE_EXT.length);
|
|
1814
|
+
const candidate = await readCandidate(root, id);
|
|
1815
|
+
if (candidate) candidates.push(candidate);
|
|
1816
|
+
}
|
|
1817
|
+
candidates.sort((a, b) => a.generatedAt.localeCompare(b.generatedAt));
|
|
1818
|
+
return candidates;
|
|
1819
|
+
}
|
|
1820
|
+
async function countCandidates(root) {
|
|
1821
|
+
const candidates = await listCandidates(root);
|
|
1822
|
+
return candidates.length;
|
|
1823
|
+
}
|
|
1824
|
+
async function deleteCandidate(root, id) {
|
|
1825
|
+
const filePath = candidatePath(root, id);
|
|
1826
|
+
if (!existsSync4(filePath)) return false;
|
|
1827
|
+
await unlink2(filePath);
|
|
1828
|
+
return true;
|
|
1829
|
+
}
|
|
1830
|
+
async function archiveCandidate(root, id) {
|
|
1831
|
+
const sourcePath = candidatePath(root, id);
|
|
1832
|
+
if (!existsSync4(sourcePath)) return false;
|
|
1833
|
+
const target = archivePath(root, id);
|
|
1834
|
+
await mkdir5(path14.dirname(target), { recursive: true });
|
|
1835
|
+
try {
|
|
1836
|
+
await rename3(sourcePath, target);
|
|
1837
|
+
} catch {
|
|
1838
|
+
const raw = await safeReadFile(sourcePath);
|
|
1839
|
+
await writeFile4(target, raw, "utf-8");
|
|
1840
|
+
await unlink2(sourcePath);
|
|
1841
|
+
}
|
|
1842
|
+
return true;
|
|
1843
|
+
}
|
|
1844
|
+
|
|
1845
|
+
// src/compiler/page-renderer.ts
|
|
1846
|
+
import { readdir as readdir7 } from "fs/promises";
|
|
1847
|
+
import path15 from "path";
|
|
1848
|
+
|
|
1849
|
+
// src/compiler/provenance.ts
|
|
1850
|
+
function addProvenanceMeta(fields, concept) {
|
|
1851
|
+
if (typeof concept.confidence === "number") {
|
|
1852
|
+
fields.confidence = concept.confidence;
|
|
1853
|
+
}
|
|
1854
|
+
if (concept.provenanceState) {
|
|
1855
|
+
fields.provenanceState = concept.provenanceState;
|
|
1856
|
+
}
|
|
1857
|
+
if (concept.contradictedBy && concept.contradictedBy.length > 0) {
|
|
1858
|
+
fields.contradictedBy = concept.contradictedBy;
|
|
1859
|
+
}
|
|
1860
|
+
if (typeof concept.inferredParagraphs === "number") {
|
|
1861
|
+
fields.inferredParagraphs = concept.inferredParagraphs;
|
|
1862
|
+
}
|
|
1863
|
+
}
|
|
1864
|
+
function reportContradictionWarnings(conceptTitle, concept) {
|
|
1865
|
+
const refs = concept.contradictedBy;
|
|
1866
|
+
if (!refs || refs.length === 0) return;
|
|
1867
|
+
const slugs = refs.map((r) => r.slug).join(", ");
|
|
1868
|
+
status(
|
|
1869
|
+
"!",
|
|
1870
|
+
warn(`Contradiction reported on "${conceptTitle}" \u2014 conflicts with: ${slugs}`)
|
|
1871
|
+
);
|
|
1872
|
+
}
|
|
1873
|
+
|
|
1874
|
+
// src/compiler/page-renderer.ts
|
|
1875
|
+
var RELATED_PAGE_CONTEXT_LIMIT = 5;
|
|
1876
|
+
async function renderMergedPageContent(root, entry) {
|
|
1877
|
+
const pagePath = path15.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
|
|
1878
|
+
const existingPage = await safeReadFile(pagePath);
|
|
1879
|
+
const relatedPages = await loadRelatedPages(root, entry.slug);
|
|
1880
|
+
const system = buildPagePrompt(
|
|
1881
|
+
entry.concept.concept,
|
|
1882
|
+
entry.combinedContent,
|
|
1883
|
+
existingPage,
|
|
1884
|
+
relatedPages
|
|
1885
|
+
);
|
|
1886
|
+
const pageBody = await callClaude({
|
|
1887
|
+
system,
|
|
1888
|
+
messages: [
|
|
1889
|
+
{ role: "user", content: `Write the wiki page for "${entry.concept.concept}".` }
|
|
1890
|
+
]
|
|
1891
|
+
});
|
|
1892
|
+
const frontmatter = buildMergedFrontmatter(entry, existingPage);
|
|
1893
|
+
reportContradictionWarnings(entry.concept.concept, entry.concept);
|
|
1894
|
+
return `${frontmatter}
|
|
1895
|
+
|
|
1896
|
+
${pageBody}
|
|
1897
|
+
`;
|
|
1898
|
+
}
|
|
1899
|
+
function buildMergedFrontmatter(entry, existingPage) {
|
|
1900
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1901
|
+
const existing = existingPage ? parseFrontmatter(existingPage) : null;
|
|
1902
|
+
const createdAt = existing?.meta.createdAt && typeof existing.meta.createdAt === "string" ? existing.meta.createdAt : now;
|
|
1903
|
+
const frontmatterFields = {
|
|
1904
|
+
title: entry.concept.concept,
|
|
1905
|
+
summary: entry.concept.summary,
|
|
1906
|
+
sources: entry.sourceFiles,
|
|
1907
|
+
createdAt,
|
|
1908
|
+
updatedAt: now
|
|
1909
|
+
};
|
|
1910
|
+
addObsidianMeta(frontmatterFields, entry.concept.concept, entry.concept.tags ?? []);
|
|
1911
|
+
addProvenanceMeta(frontmatterFields, entry.concept);
|
|
1912
|
+
return buildFrontmatter(frontmatterFields);
|
|
1913
|
+
}
|
|
1914
|
+
async function loadRelatedPages(root, excludeSlug) {
|
|
1915
|
+
const conceptsPath = path15.join(root, CONCEPTS_DIR);
|
|
1916
|
+
let files;
|
|
1917
|
+
try {
|
|
1918
|
+
files = await readdir7(conceptsPath);
|
|
1919
|
+
} catch {
|
|
1920
|
+
return "";
|
|
1921
|
+
}
|
|
1922
|
+
const related = files.filter((f) => f.endsWith(".md") && f !== `${excludeSlug}.md`).slice(0, RELATED_PAGE_CONTEXT_LIMIT);
|
|
1923
|
+
const contents = [];
|
|
1924
|
+
for (const f of related) {
|
|
1925
|
+
const content = await safeReadFile(path15.join(conceptsPath, f));
|
|
1926
|
+
if (!content) continue;
|
|
1927
|
+
const { meta } = parseFrontmatter(content);
|
|
1928
|
+
if (meta.orphaned) continue;
|
|
1929
|
+
contents.push(content);
|
|
1930
|
+
}
|
|
1931
|
+
return contents.join("\n\n---\n\n");
|
|
1932
|
+
}
|
|
1933
|
+
|
|
936
1934
|
// src/compiler/index.ts
|
|
937
1935
|
import pLimit from "p-limit";
|
|
938
|
-
|
|
1936
|
+
function emptyCompileResult() {
|
|
1937
|
+
return { compiled: 0, skipped: 0, deleted: 0, concepts: [], pages: [], errors: [] };
|
|
1938
|
+
}
|
|
1939
|
+
async function compile(root, options = {}) {
|
|
1940
|
+
await compileAndReport(root, options);
|
|
1941
|
+
}
|
|
1942
|
+
async function compileAndReport(root, options = {}) {
|
|
939
1943
|
header("llmwiki compile");
|
|
940
1944
|
const locked = await acquireLock(root);
|
|
941
1945
|
if (!locked) {
|
|
942
1946
|
status("!", error("Could not acquire lock. Try again later."));
|
|
943
|
-
return
|
|
1947
|
+
return {
|
|
1948
|
+
...emptyCompileResult(),
|
|
1949
|
+
errors: ["Could not acquire .llmwiki/lock \u2014 another compile is in progress."]
|
|
1950
|
+
};
|
|
944
1951
|
}
|
|
945
1952
|
try {
|
|
946
|
-
await runCompilePipeline(root);
|
|
1953
|
+
return await runCompilePipeline(root, options);
|
|
947
1954
|
} finally {
|
|
948
1955
|
await releaseLock(root);
|
|
949
1956
|
}
|
|
950
1957
|
}
|
|
951
|
-
|
|
1958
|
+
function bucketChanges(changes) {
|
|
1959
|
+
return {
|
|
1960
|
+
toCompile: changes.filter((c) => c.status === "new" || c.status === "changed"),
|
|
1961
|
+
deleted: changes.filter((c) => c.status === "deleted"),
|
|
1962
|
+
unchanged: changes.filter((c) => c.status === "unchanged")
|
|
1963
|
+
};
|
|
1964
|
+
}
|
|
1965
|
+
async function generatePagesPhase(root, extractions, frozenSlugs, options) {
|
|
1966
|
+
const merged = mergeExtractions(extractions, frozenSlugs);
|
|
1967
|
+
const sourceStates = options.review ? await buildExtractionSourceStates(root, extractions) : {};
|
|
1968
|
+
const limit = pLimit(COMPILE_CONCURRENCY);
|
|
1969
|
+
const errors = [];
|
|
1970
|
+
const candidates = [];
|
|
1971
|
+
const pages = await Promise.all(
|
|
1972
|
+
merged.map((entry) => limit(async () => {
|
|
1973
|
+
const result = await generateMergedPage(root, entry, options, sourceStates);
|
|
1974
|
+
if (result.error) errors.push(result.error);
|
|
1975
|
+
if (result.candidateId) candidates.push(result.candidateId);
|
|
1976
|
+
return entry;
|
|
1977
|
+
}))
|
|
1978
|
+
);
|
|
1979
|
+
return { pages, errors, candidates };
|
|
1980
|
+
}
|
|
1981
|
+
async function persistExtractionStates(root, extractions) {
|
|
1982
|
+
for (const result of extractions) {
|
|
1983
|
+
if (result.concepts.length === 0) continue;
|
|
1984
|
+
await persistSourceState(root, result.sourcePath, result.sourceFile, result.concepts);
|
|
1985
|
+
}
|
|
1986
|
+
}
|
|
1987
|
+
function summarizeCompile(buckets, generation, extractions, options) {
|
|
1988
|
+
header("Compilation complete");
|
|
1989
|
+
status("\u2713", success(
|
|
1990
|
+
`${buckets.toCompile.length} compiled, ${buckets.unchanged.length} skipped, ${buckets.deleted.length} deleted`
|
|
1991
|
+
));
|
|
1992
|
+
if (options.review && generation.candidates.length > 0) {
|
|
1993
|
+
status("?", info(
|
|
1994
|
+
`${generation.candidates.length} candidate(s) awaiting review \u2014 run \`llmwiki review list\``
|
|
1995
|
+
));
|
|
1996
|
+
} else if (buckets.toCompile.length > 0) {
|
|
1997
|
+
status("\u2192", dim('Next: llmwiki query "your question here"'));
|
|
1998
|
+
}
|
|
1999
|
+
const errors = [...generation.errors];
|
|
2000
|
+
for (const result of extractions) {
|
|
2001
|
+
if (result.concepts.length === 0) {
|
|
2002
|
+
errors.push(`No concepts extracted from ${result.sourceFile}`);
|
|
2003
|
+
}
|
|
2004
|
+
}
|
|
2005
|
+
const baseResult = {
|
|
2006
|
+
compiled: buckets.toCompile.length,
|
|
2007
|
+
skipped: buckets.unchanged.length,
|
|
2008
|
+
deleted: buckets.deleted.length,
|
|
2009
|
+
concepts: generation.pages.map((entry) => entry.concept.concept),
|
|
2010
|
+
pages: generation.pages.map((entry) => entry.slug),
|
|
2011
|
+
errors
|
|
2012
|
+
};
|
|
2013
|
+
if (options.review) {
|
|
2014
|
+
baseResult.candidates = generation.candidates;
|
|
2015
|
+
}
|
|
2016
|
+
return baseResult;
|
|
2017
|
+
}
|
|
2018
|
+
async function runCompilePipeline(root, options) {
|
|
952
2019
|
const state = await readState(root);
|
|
953
2020
|
const changes = await detectChanges(root, state);
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
changes.push({ file, status: "changed" });
|
|
958
|
-
}
|
|
959
|
-
const toCompile = changes.filter((c) => c.status === "new" || c.status === "changed");
|
|
960
|
-
const deleted = changes.filter((c) => c.status === "deleted");
|
|
961
|
-
const unchanged = changes.filter((c) => c.status === "unchanged");
|
|
962
|
-
if (toCompile.length === 0 && deleted.length === 0) {
|
|
2021
|
+
augmentWithAffectedSources(changes, findAffectedSources(state, changes));
|
|
2022
|
+
const buckets = bucketChanges(changes);
|
|
2023
|
+
if (buckets.toCompile.length === 0 && buckets.deleted.length === 0) {
|
|
963
2024
|
status("\u2713", success("Nothing to compile \u2014 all sources up to date."));
|
|
964
|
-
return;
|
|
2025
|
+
return { ...emptyCompileResult(), skipped: buckets.unchanged.length };
|
|
965
2026
|
}
|
|
966
2027
|
printChangesSummary(changes);
|
|
2028
|
+
if (!options.review) {
|
|
2029
|
+
await markDeletedAsOrphaned(root, buckets.deleted, state);
|
|
2030
|
+
}
|
|
2031
|
+
const frozenSlugs = findFrozenSlugs(state, changes);
|
|
2032
|
+
reportFrozenSlugs(frozenSlugs);
|
|
2033
|
+
const extractions = await runExtractionPhases(root, buckets.toCompile, state, changes);
|
|
2034
|
+
if (!options.review) {
|
|
2035
|
+
await freezeFailedExtractions(root, extractions, frozenSlugs);
|
|
2036
|
+
}
|
|
2037
|
+
const generation = await generatePagesPhase(root, extractions, frozenSlugs, options);
|
|
2038
|
+
if (!options.review) {
|
|
2039
|
+
await persistExtractionStates(root, extractions);
|
|
2040
|
+
if (frozenSlugs.size > 0) {
|
|
2041
|
+
await orphanUnownedFrozenPages(root, frozenSlugs);
|
|
2042
|
+
}
|
|
2043
|
+
await persistFrozenSlugs(root, frozenSlugs, extractions);
|
|
2044
|
+
await finalizeWiki(root, generation.pages);
|
|
2045
|
+
}
|
|
2046
|
+
return summarizeCompile(buckets, generation, extractions, options);
|
|
2047
|
+
}
|
|
2048
|
+
function augmentWithAffectedSources(changes, affected) {
|
|
2049
|
+
for (const file of affected) {
|
|
2050
|
+
status("~", info(`${file} [affected by shared concept]`));
|
|
2051
|
+
changes.push({ file, status: "changed" });
|
|
2052
|
+
}
|
|
2053
|
+
}
|
|
2054
|
+
async function markDeletedAsOrphaned(root, deleted, state) {
|
|
967
2055
|
for (const del of deleted) {
|
|
968
2056
|
await markOrphaned(root, del.file, state);
|
|
969
2057
|
}
|
|
970
|
-
|
|
2058
|
+
}
|
|
2059
|
+
function reportFrozenSlugs(frozenSlugs) {
|
|
971
2060
|
for (const slug of frozenSlugs) {
|
|
972
2061
|
status("i", dim(`Frozen: ${slug} (shared with deleted source)`));
|
|
973
2062
|
}
|
|
2063
|
+
}
|
|
2064
|
+
async function runExtractionPhases(root, toCompile, state, allChanges) {
|
|
974
2065
|
const extractions = [];
|
|
975
2066
|
for (const change of toCompile) {
|
|
976
2067
|
extractions.push(await extractForSource(root, change.file));
|
|
977
2068
|
}
|
|
978
|
-
const lateAffected = findLateAffectedSources(extractions, state,
|
|
2069
|
+
const lateAffected = findLateAffectedSources(extractions, state, allChanges);
|
|
979
2070
|
for (const file of lateAffected) {
|
|
980
2071
|
status("~", info(`${file} [shares concept with new source]`));
|
|
981
2072
|
extractions.push(await extractForSource(root, file));
|
|
982
2073
|
}
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
const
|
|
987
|
-
|
|
988
|
-
await generateMergedPage(root, entry);
|
|
989
|
-
return entry;
|
|
990
|
-
}))
|
|
991
|
-
);
|
|
992
|
-
const allChangedSlugs = pageResults.map((e) => e.slug);
|
|
993
|
-
const allNewSlugs = pageResults.filter((e) => e.concept.is_new).map((e) => e.slug);
|
|
994
|
-
for (const result of extractions) {
|
|
995
|
-
if (result.concepts.length === 0) continue;
|
|
996
|
-
await persistSourceState(root, result.sourcePath, result.sourceFile, result.concepts);
|
|
997
|
-
}
|
|
998
|
-
if (frozenSlugs.size > 0) {
|
|
999
|
-
await orphanUnownedFrozenPages(root, frozenSlugs);
|
|
1000
|
-
}
|
|
1001
|
-
await persistFrozenSlugs(root, frozenSlugs, extractions);
|
|
2074
|
+
return extractions;
|
|
2075
|
+
}
|
|
2076
|
+
async function finalizeWiki(root, pages) {
|
|
2077
|
+
const allChangedSlugs = pages.map((entry) => entry.slug);
|
|
2078
|
+
const allNewSlugs = pages.filter((entry) => entry.concept.is_new).map((entry) => entry.slug);
|
|
1002
2079
|
if (allChangedSlugs.length > 0) {
|
|
1003
2080
|
status("\u{1F517}", info("Resolving interlinks..."));
|
|
1004
2081
|
await resolveLinks(root, allChangedSlugs, allNewSlugs);
|
|
1005
2082
|
}
|
|
1006
2083
|
await generateIndex(root);
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
`${toCompile.length} compiled, ${unchanged.length} skipped, ${deleted.length} deleted`
|
|
1010
|
-
));
|
|
1011
|
-
if (toCompile.length > 0) {
|
|
1012
|
-
status("\u2192", dim('Next: llmwiki query "your question here"'));
|
|
1013
|
-
}
|
|
2084
|
+
await generateMOC(root);
|
|
2085
|
+
await safelyUpdateEmbeddings(root, allChangedSlugs);
|
|
1014
2086
|
}
|
|
1015
2087
|
function printChangesSummary(changes) {
|
|
1016
2088
|
const iconMap = {
|
|
@@ -1033,9 +2105,9 @@ function printChangesSummary(changes) {
|
|
|
1033
2105
|
}
|
|
1034
2106
|
async function extractForSource(root, sourceFile) {
|
|
1035
2107
|
status("*", info(`Extracting: ${sourceFile}`));
|
|
1036
|
-
const sourcePath =
|
|
1037
|
-
const sourceContent = await
|
|
1038
|
-
const existingIndex = await safeReadFile(
|
|
2108
|
+
const sourcePath = path16.join(root, SOURCES_DIR, sourceFile);
|
|
2109
|
+
const sourceContent = await readFile8(sourcePath, "utf-8");
|
|
2110
|
+
const existingIndex = await safeReadFile(path16.join(root, INDEX_FILE));
|
|
1039
2111
|
const concepts = await extractConcepts(sourceContent, existingIndex);
|
|
1040
2112
|
if (concepts.length > 0) {
|
|
1041
2113
|
const names = concepts.map((c) => c.concept).join(", ");
|
|
@@ -1043,6 +2115,26 @@ async function extractForSource(root, sourceFile) {
|
|
|
1043
2115
|
}
|
|
1044
2116
|
return { sourceFile, sourcePath, sourceContent, concepts };
|
|
1045
2117
|
}
|
|
2118
|
+
function reconcileConceptMetadata(existing, incoming) {
|
|
2119
|
+
const reconciled = { ...existing };
|
|
2120
|
+
if (typeof incoming.confidence === "number") {
|
|
2121
|
+
reconciled.confidence = typeof existing.confidence === "number" ? Math.min(existing.confidence, incoming.confidence) : incoming.confidence;
|
|
2122
|
+
}
|
|
2123
|
+
reconciled.provenanceState = "merged";
|
|
2124
|
+
const refs = [...existing.contradictedBy ?? []];
|
|
2125
|
+
const seenSlugs = new Set(refs.map((r) => r.slug));
|
|
2126
|
+
for (const ref of incoming.contradictedBy ?? []) {
|
|
2127
|
+
if (!seenSlugs.has(ref.slug)) {
|
|
2128
|
+
refs.push(ref);
|
|
2129
|
+
seenSlugs.add(ref.slug);
|
|
2130
|
+
}
|
|
2131
|
+
}
|
|
2132
|
+
reconciled.contradictedBy = refs.length > 0 ? refs : void 0;
|
|
2133
|
+
if (typeof incoming.inferredParagraphs === "number") {
|
|
2134
|
+
reconciled.inferredParagraphs = typeof existing.inferredParagraphs === "number" ? Math.max(existing.inferredParagraphs, incoming.inferredParagraphs) : incoming.inferredParagraphs;
|
|
2135
|
+
}
|
|
2136
|
+
return reconciled;
|
|
2137
|
+
}
|
|
1046
2138
|
function mergeExtractions(extractions, frozenSlugs) {
|
|
1047
2139
|
const bySlug = /* @__PURE__ */ new Map();
|
|
1048
2140
|
for (const result of extractions) {
|
|
@@ -1052,6 +2144,7 @@ function mergeExtractions(extractions, frozenSlugs) {
|
|
|
1052
2144
|
if (frozenSlugs.has(slug)) continue;
|
|
1053
2145
|
const existing = bySlug.get(slug);
|
|
1054
2146
|
if (existing) {
|
|
2147
|
+
existing.concept = reconcileConceptMetadata(existing.concept, concept);
|
|
1055
2148
|
existing.sourceFiles.push(result.sourceFile);
|
|
1056
2149
|
existing.combinedContent += `
|
|
1057
2150
|
|
|
@@ -1072,37 +2165,26 @@ ${result.sourceContent}`
|
|
|
1072
2165
|
}
|
|
1073
2166
|
return Array.from(bySlug.values());
|
|
1074
2167
|
}
|
|
1075
|
-
async function generateMergedPage(root, entry) {
|
|
1076
|
-
const
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
const
|
|
1086
|
-
system,
|
|
1087
|
-
messages: [
|
|
1088
|
-
{ role: "user", content: `Write the wiki page for "${entry.concept.concept}".` }
|
|
1089
|
-
]
|
|
1090
|
-
});
|
|
1091
|
-
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1092
|
-
const existing = existingPage ? parseFrontmatter(existingPage) : null;
|
|
1093
|
-
const createdAt = existing?.meta.createdAt && typeof existing.meta.createdAt === "string" ? existing.meta.createdAt : now;
|
|
1094
|
-
const frontmatter = buildFrontmatter({
|
|
2168
|
+
async function generateMergedPage(root, entry, options, sourceStates) {
|
|
2169
|
+
const fullPage = await renderMergedPageContent(root, entry);
|
|
2170
|
+
if (options.review) {
|
|
2171
|
+
return await persistReviewCandidate(root, entry, fullPage, sourceStates);
|
|
2172
|
+
}
|
|
2173
|
+
const pagePath = path16.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
|
|
2174
|
+
const error2 = await writePageIfValid(pagePath, fullPage, entry.concept.concept);
|
|
2175
|
+
return { error: error2 ?? void 0 };
|
|
2176
|
+
}
|
|
2177
|
+
async function persistReviewCandidate(root, entry, fullPage, sourceStates) {
|
|
2178
|
+
const candidate = await writeCandidate(root, {
|
|
1095
2179
|
title: entry.concept.concept,
|
|
2180
|
+
slug: entry.slug,
|
|
1096
2181
|
summary: entry.concept.summary,
|
|
1097
2182
|
sources: entry.sourceFiles,
|
|
1098
|
-
|
|
1099
|
-
|
|
2183
|
+
body: fullPage,
|
|
2184
|
+
sourceStates: pickStatesForSources(sourceStates, entry.sourceFiles)
|
|
1100
2185
|
});
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
${pageBody}
|
|
1104
|
-
`;
|
|
1105
|
-
await writePageIfValid(pagePath, fullPage, entry.concept.concept);
|
|
2186
|
+
status("?", info(`Candidate ready: ${candidate.id} (${entry.slug})`));
|
|
2187
|
+
return { candidateId: candidate.id };
|
|
1106
2188
|
}
|
|
1107
2189
|
async function extractConcepts(sourceContent, existingIndex) {
|
|
1108
2190
|
const system = buildExtractionPrompt(sourceContent, existingIndex);
|
|
@@ -1113,31 +2195,21 @@ async function extractConcepts(sourceContent, existingIndex) {
|
|
|
1113
2195
|
});
|
|
1114
2196
|
return parseConcepts(rawOutput);
|
|
1115
2197
|
}
|
|
1116
|
-
async function loadRelatedPages(root, excludeSlug) {
|
|
1117
|
-
const conceptsPath = path10.join(root, CONCEPTS_DIR);
|
|
1118
|
-
let files;
|
|
1119
|
-
try {
|
|
1120
|
-
files = await readdir4(conceptsPath);
|
|
1121
|
-
} catch {
|
|
1122
|
-
return "";
|
|
1123
|
-
}
|
|
1124
|
-
const related = files.filter((f) => f.endsWith(".md") && f !== `${excludeSlug}.md`).slice(0, 5);
|
|
1125
|
-
const contents = [];
|
|
1126
|
-
for (const f of related) {
|
|
1127
|
-
const content = await safeReadFile(path10.join(conceptsPath, f));
|
|
1128
|
-
if (!content) continue;
|
|
1129
|
-
const { meta } = parseFrontmatter(content);
|
|
1130
|
-
if (meta.orphaned) continue;
|
|
1131
|
-
contents.push(content);
|
|
1132
|
-
}
|
|
1133
|
-
return contents.join("\n\n---\n\n");
|
|
1134
|
-
}
|
|
1135
2198
|
async function writePageIfValid(pagePath, content, conceptTitle) {
|
|
1136
2199
|
if (!validateWikiPage(content)) {
|
|
1137
2200
|
status("!", warn(`Invalid page for "${conceptTitle}" \u2014 skipped.`));
|
|
1138
|
-
return
|
|
2201
|
+
return `Invalid page for "${conceptTitle}" \u2014 failed validation`;
|
|
1139
2202
|
}
|
|
1140
2203
|
await atomicWrite(pagePath, content);
|
|
2204
|
+
return null;
|
|
2205
|
+
}
|
|
2206
|
+
async function safelyUpdateEmbeddings(root, changedSlugs) {
|
|
2207
|
+
try {
|
|
2208
|
+
await updateEmbeddings(root, changedSlugs);
|
|
2209
|
+
} catch (err) {
|
|
2210
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
2211
|
+
status("!", warn(`Skipped embeddings update: ${message}`));
|
|
2212
|
+
}
|
|
1141
2213
|
}
|
|
1142
2214
|
async function persistSourceState(root, sourcePath, sourceFile, concepts) {
|
|
1143
2215
|
const hash = await hashFile(sourcePath);
|
|
@@ -1150,20 +2222,20 @@ async function persistSourceState(root, sourcePath, sourceFile, concepts) {
|
|
|
1150
2222
|
}
|
|
1151
2223
|
|
|
1152
2224
|
// src/commands/compile.ts
|
|
1153
|
-
async function compileCommand() {
|
|
1154
|
-
if (!
|
|
2225
|
+
async function compileCommand(options = {}) {
|
|
2226
|
+
if (!existsSync5(SOURCES_DIR)) {
|
|
1155
2227
|
status(
|
|
1156
2228
|
"!",
|
|
1157
2229
|
warn("No sources found. Run `llmwiki ingest <url>` first.")
|
|
1158
2230
|
);
|
|
1159
2231
|
return;
|
|
1160
2232
|
}
|
|
1161
|
-
await compile(process.cwd());
|
|
2233
|
+
await compile(process.cwd(), options);
|
|
1162
2234
|
}
|
|
1163
2235
|
|
|
1164
2236
|
// src/commands/query.ts
|
|
1165
|
-
import { existsSync as
|
|
1166
|
-
import
|
|
2237
|
+
import { existsSync as existsSync6 } from "fs";
|
|
2238
|
+
import path17 from "path";
|
|
1167
2239
|
var PAGE_DIRS = [CONCEPTS_DIR, QUERIES_DIR];
|
|
1168
2240
|
var PAGE_SELECTION_TOOL = {
|
|
1169
2241
|
name: "select_pages",
|
|
@@ -1208,12 +2280,35 @@ ${indexContent}`;
|
|
|
1208
2280
|
return { pages: [], reasoning: "Failed to parse page selection response" };
|
|
1209
2281
|
}
|
|
1210
2282
|
}
|
|
2283
|
+
function buildFilteredIndex(candidates) {
|
|
2284
|
+
return candidates.map((entry) => `- **${entry.slug}**: ${entry.title} \u2014 ${entry.summary}`).join("\n");
|
|
2285
|
+
}
|
|
2286
|
+
async function selectRelevantPages(root, question) {
|
|
2287
|
+
const candidates = await tryFindRelevantPages(root, question);
|
|
2288
|
+
if (candidates.length > 0) {
|
|
2289
|
+
const filteredIndex = buildFilteredIndex(candidates);
|
|
2290
|
+
const { pages: rawPages2, reasoning: reasoning2 } = await selectPages(question, filteredIndex);
|
|
2291
|
+
return { pages: rawPages2, rawPages: rawPages2, reasoning: reasoning2 };
|
|
2292
|
+
}
|
|
2293
|
+
const indexContent = await safeReadFile(path17.join(root, INDEX_FILE));
|
|
2294
|
+
const { pages: rawPages, reasoning } = await selectPages(question, indexContent);
|
|
2295
|
+
return { pages: rawPages.map((p) => slugify(p)), rawPages, reasoning };
|
|
2296
|
+
}
|
|
2297
|
+
async function tryFindRelevantPages(root, question) {
|
|
2298
|
+
try {
|
|
2299
|
+
return await findRelevantPages(root, question);
|
|
2300
|
+
} catch (err) {
|
|
2301
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
2302
|
+
status("!", dim(`Semantic pre-filter unavailable (${message}); using full index.`));
|
|
2303
|
+
return [];
|
|
2304
|
+
}
|
|
2305
|
+
}
|
|
1211
2306
|
async function loadSelectedPages(root, slugs) {
|
|
1212
2307
|
const sections = [];
|
|
1213
2308
|
for (const slug of slugs) {
|
|
1214
2309
|
let content = "";
|
|
1215
2310
|
for (const dir of PAGE_DIRS) {
|
|
1216
|
-
const candidate = await safeReadFile(
|
|
2311
|
+
const candidate = await safeReadFile(path17.join(root, dir, `${slug}.md`));
|
|
1217
2312
|
if (!candidate) continue;
|
|
1218
2313
|
const { meta } = parseFrontmatter(candidate);
|
|
1219
2314
|
if (meta.orphaned) continue;
|
|
@@ -1229,20 +2324,18 @@ ${content}`);
|
|
|
1229
2324
|
}
|
|
1230
2325
|
return sections.join("\n\n");
|
|
1231
2326
|
}
|
|
1232
|
-
|
|
1233
|
-
|
|
2327
|
+
var ANSWER_SYSTEM_PROMPT = "You are a knowledge assistant. Answer the question using ONLY the wiki content provided. Cite specific pages using [[Page Title]] wikilinks. If the wiki doesn't contain enough information, say so.";
|
|
2328
|
+
async function callAnswerLLM(question, pagesContent, onToken) {
|
|
1234
2329
|
const userMessage = `Question: ${question}
|
|
1235
2330
|
|
|
1236
2331
|
Relevant wiki pages:
|
|
1237
2332
|
${pagesContent}`;
|
|
1238
|
-
|
|
1239
|
-
system:
|
|
2333
|
+
return callClaude({
|
|
2334
|
+
system: ANSWER_SYSTEM_PROMPT,
|
|
1240
2335
|
messages: [{ role: "user", content: userMessage }],
|
|
1241
|
-
stream:
|
|
1242
|
-
onToken
|
|
2336
|
+
stream: Boolean(onToken),
|
|
2337
|
+
onToken
|
|
1243
2338
|
});
|
|
1244
|
-
process.stdout.write("\n");
|
|
1245
|
-
return answer;
|
|
1246
2339
|
}
|
|
1247
2340
|
function summarizeAnswer(answer) {
|
|
1248
2341
|
const firstLine = answer.trim().split(/\n/)[0] ?? "";
|
|
@@ -1251,7 +2344,7 @@ function summarizeAnswer(answer) {
|
|
|
1251
2344
|
}
|
|
1252
2345
|
async function saveQueryPage(root, question, answer) {
|
|
1253
2346
|
const slug = slugify(question);
|
|
1254
|
-
const filePath =
|
|
2347
|
+
const filePath = path17.join(root, QUERIES_DIR, `${slug}.md`);
|
|
1255
2348
|
const frontmatter = buildFrontmatter({
|
|
1256
2349
|
title: question,
|
|
1257
2350
|
summary: summarizeAnswer(answer),
|
|
@@ -1268,27 +2361,52 @@ ${answer}
|
|
|
1268
2361
|
success(`Saved query \u2192 ${source(filePath)}`)
|
|
1269
2362
|
);
|
|
1270
2363
|
await generateIndex(root);
|
|
2364
|
+
try {
|
|
2365
|
+
await updateEmbeddings(root, [slug]);
|
|
2366
|
+
} catch (err) {
|
|
2367
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
2368
|
+
status("!", warn(`Skipped embeddings update: ${message}`));
|
|
2369
|
+
}
|
|
2370
|
+
return slug;
|
|
2371
|
+
}
|
|
2372
|
+
async function generateAnswer(root, question, options = {}) {
|
|
2373
|
+
if (!existsSync6(path17.join(root, INDEX_FILE))) {
|
|
2374
|
+
throw new Error("Wiki index not found. Run `llmwiki compile` first.");
|
|
2375
|
+
}
|
|
2376
|
+
const { pages, reasoning } = await selectRelevantPages(root, question);
|
|
2377
|
+
options.onPageSelection?.(pages, reasoning);
|
|
2378
|
+
const pagesContent = await loadSelectedPages(root, pages);
|
|
2379
|
+
if (!pagesContent) {
|
|
2380
|
+
return { answer: "", selectedPages: pages, reasoning };
|
|
2381
|
+
}
|
|
2382
|
+
const answer = await callAnswerLLM(question, pagesContent, options.onToken);
|
|
2383
|
+
let saved;
|
|
2384
|
+
if (options.save) {
|
|
2385
|
+
saved = await saveQueryPage(root, question, answer);
|
|
2386
|
+
}
|
|
2387
|
+
return { answer, selectedPages: pages, reasoning, saved };
|
|
1271
2388
|
}
|
|
1272
2389
|
async function queryCommand(root, question, options) {
|
|
1273
|
-
if (!
|
|
2390
|
+
if (!existsSync6(path17.join(root, INDEX_FILE))) {
|
|
1274
2391
|
status("!", error("Wiki index not found. Run `llmwiki compile` first."));
|
|
1275
2392
|
return;
|
|
1276
2393
|
}
|
|
1277
2394
|
header("Selecting relevant pages");
|
|
1278
|
-
const
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
2395
|
+
const result = await generateAnswer(root, question, {
|
|
2396
|
+
save: options.save,
|
|
2397
|
+
onToken: (text) => process.stdout.write(text),
|
|
2398
|
+
onPageSelection: (pages, reasoning) => {
|
|
2399
|
+
status("i", dim(`Reasoning: ${reasoning}`));
|
|
2400
|
+
status("*", info(`Selected ${pages.length} page(s): ${pages.join(", ")}`));
|
|
2401
|
+
header("Generating answer");
|
|
2402
|
+
}
|
|
2403
|
+
});
|
|
2404
|
+
process.stdout.write("\n");
|
|
2405
|
+
if (!result.answer) {
|
|
1286
2406
|
status("!", error("No matching pages found. Try refining your question."));
|
|
1287
2407
|
return;
|
|
1288
2408
|
}
|
|
1289
|
-
|
|
1290
|
-
if (options.save) {
|
|
1291
|
-
await saveQueryPage(root, question, answer);
|
|
2409
|
+
if (result.saved) {
|
|
1292
2410
|
status("\u2192", dim("Saved. Future queries will use this answer as context."));
|
|
1293
2411
|
} else {
|
|
1294
2412
|
status("\u2192", dim("Tip: use --save to add this answer to your wiki"));
|
|
@@ -1297,12 +2415,12 @@ async function queryCommand(root, question, options) {
|
|
|
1297
2415
|
|
|
1298
2416
|
// src/commands/watch.ts
|
|
1299
2417
|
import { watch as chokidarWatch } from "chokidar";
|
|
1300
|
-
import { existsSync as
|
|
1301
|
-
import
|
|
2418
|
+
import { existsSync as existsSync7 } from "fs";
|
|
2419
|
+
import path18 from "path";
|
|
1302
2420
|
var DEBOUNCE_MS = 500;
|
|
1303
2421
|
async function watchCommand() {
|
|
1304
|
-
const sourcesPath =
|
|
1305
|
-
if (!
|
|
2422
|
+
const sourcesPath = path18.resolve(SOURCES_DIR);
|
|
2423
|
+
if (!existsSync7(sourcesPath)) {
|
|
1306
2424
|
status(
|
|
1307
2425
|
"!",
|
|
1308
2426
|
warn("No sources/ directory found. Run `llmwiki ingest <url>` first.")
|
|
@@ -1336,7 +2454,7 @@ async function watchCommand() {
|
|
|
1336
2454
|
const scheduleCompile = (eventPath, event) => {
|
|
1337
2455
|
status(
|
|
1338
2456
|
"~",
|
|
1339
|
-
dim(`${event}: ${
|
|
2457
|
+
dim(`${event}: ${path18.basename(eventPath)}`)
|
|
1340
2458
|
);
|
|
1341
2459
|
if (debounceTimer) clearTimeout(debounceTimer);
|
|
1342
2460
|
debounceTimer = setTimeout(triggerCompile, DEBOUNCE_MS);
|
|
@@ -1350,6 +2468,807 @@ async function watchCommand() {
|
|
|
1350
2468
|
});
|
|
1351
2469
|
}
|
|
1352
2470
|
|
|
2471
|
+
// src/linter/rules.ts
|
|
2472
|
+
import { readdir as readdir8, readFile as readFile9 } from "fs/promises";
|
|
2473
|
+
import { existsSync as existsSync8 } from "fs";
|
|
2474
|
+
import path19 from "path";
|
|
2475
|
+
var MIN_BODY_LENGTH = 50;
|
|
2476
|
+
var WIKILINK_PATTERN = /\[\[([^\]]+)\]\]/g;
|
|
2477
|
+
var CITATION_PATTERN = /\^\[([^\]]+)\]/g;
|
|
2478
|
+
function findMatchesInContent(content, pattern) {
|
|
2479
|
+
const results = [];
|
|
2480
|
+
const lines = content.split("\n");
|
|
2481
|
+
for (let i = 0; i < lines.length; i++) {
|
|
2482
|
+
const matches = lines[i].matchAll(pattern);
|
|
2483
|
+
for (const match of matches) {
|
|
2484
|
+
results.push({ captured: match[1], line: i + 1 });
|
|
2485
|
+
}
|
|
2486
|
+
}
|
|
2487
|
+
return results;
|
|
2488
|
+
}
|
|
2489
|
+
async function readMarkdownFiles(dirPath) {
|
|
2490
|
+
if (!existsSync8(dirPath)) return [];
|
|
2491
|
+
const entries = await readdir8(dirPath);
|
|
2492
|
+
const mdFiles = entries.filter((f) => f.endsWith(".md"));
|
|
2493
|
+
const results = await Promise.all(
|
|
2494
|
+
mdFiles.map(async (fileName) => {
|
|
2495
|
+
const filePath = path19.join(dirPath, fileName);
|
|
2496
|
+
const content = await readFile9(filePath, "utf-8");
|
|
2497
|
+
return { filePath, content };
|
|
2498
|
+
})
|
|
2499
|
+
);
|
|
2500
|
+
return results;
|
|
2501
|
+
}
|
|
2502
|
+
async function collectAllPages(root) {
|
|
2503
|
+
const conceptPages = await readMarkdownFiles(path19.join(root, CONCEPTS_DIR));
|
|
2504
|
+
const queryPages = await readMarkdownFiles(path19.join(root, QUERIES_DIR));
|
|
2505
|
+
return [...conceptPages, ...queryPages];
|
|
2506
|
+
}
|
|
2507
|
+
function buildPageSlugSet(pages) {
|
|
2508
|
+
const slugs = /* @__PURE__ */ new Set();
|
|
2509
|
+
for (const page of pages) {
|
|
2510
|
+
const baseName = path19.basename(page.filePath, ".md");
|
|
2511
|
+
slugs.add(baseName.toLowerCase());
|
|
2512
|
+
}
|
|
2513
|
+
return slugs;
|
|
2514
|
+
}
|
|
2515
|
+
async function checkBrokenWikilinks(root) {
|
|
2516
|
+
const pages = await collectAllPages(root);
|
|
2517
|
+
const existingSlugs = buildPageSlugSet(pages);
|
|
2518
|
+
const results = [];
|
|
2519
|
+
for (const page of pages) {
|
|
2520
|
+
for (const { captured, line } of findMatchesInContent(page.content, WIKILINK_PATTERN)) {
|
|
2521
|
+
const linkSlug = slugify(captured);
|
|
2522
|
+
if (!existingSlugs.has(linkSlug)) {
|
|
2523
|
+
results.push({
|
|
2524
|
+
rule: "broken-wikilink",
|
|
2525
|
+
severity: "error",
|
|
2526
|
+
file: page.filePath,
|
|
2527
|
+
message: `Broken wikilink [[${captured}]] \u2014 no matching page found`,
|
|
2528
|
+
line
|
|
2529
|
+
});
|
|
2530
|
+
}
|
|
2531
|
+
}
|
|
2532
|
+
}
|
|
2533
|
+
return results;
|
|
2534
|
+
}
|
|
2535
|
+
async function checkOrphanedPages(root) {
|
|
2536
|
+
const pages = await collectAllPages(root);
|
|
2537
|
+
const results = [];
|
|
2538
|
+
for (const page of pages) {
|
|
2539
|
+
const { meta } = parseFrontmatter(page.content);
|
|
2540
|
+
if (meta.orphaned === true) {
|
|
2541
|
+
results.push({
|
|
2542
|
+
rule: "orphaned-page",
|
|
2543
|
+
severity: "warning",
|
|
2544
|
+
file: page.filePath,
|
|
2545
|
+
message: `Page is marked as orphaned`
|
|
2546
|
+
});
|
|
2547
|
+
}
|
|
2548
|
+
}
|
|
2549
|
+
return results;
|
|
2550
|
+
}
|
|
2551
|
+
async function checkMissingSummaries(root) {
|
|
2552
|
+
const pages = await collectAllPages(root);
|
|
2553
|
+
const results = [];
|
|
2554
|
+
for (const page of pages) {
|
|
2555
|
+
const { meta } = parseFrontmatter(page.content);
|
|
2556
|
+
const summary = meta.summary;
|
|
2557
|
+
const isMissing = !summary || typeof summary === "string" && summary.trim() === "";
|
|
2558
|
+
if (isMissing) {
|
|
2559
|
+
results.push({
|
|
2560
|
+
rule: "missing-summary",
|
|
2561
|
+
severity: "warning",
|
|
2562
|
+
file: page.filePath,
|
|
2563
|
+
message: `Page has no summary in frontmatter`
|
|
2564
|
+
});
|
|
2565
|
+
}
|
|
2566
|
+
}
|
|
2567
|
+
return results;
|
|
2568
|
+
}
|
|
2569
|
+
async function checkDuplicateConcepts(root) {
|
|
2570
|
+
const pages = await collectAllPages(root);
|
|
2571
|
+
const titleMap = /* @__PURE__ */ new Map();
|
|
2572
|
+
for (const page of pages) {
|
|
2573
|
+
const { meta } = parseFrontmatter(page.content);
|
|
2574
|
+
const title = typeof meta.title === "string" ? meta.title : "";
|
|
2575
|
+
if (!title) continue;
|
|
2576
|
+
const normalizedTitle = title.toLowerCase().trim();
|
|
2577
|
+
const existing = titleMap.get(normalizedTitle) ?? [];
|
|
2578
|
+
existing.push(page.filePath);
|
|
2579
|
+
titleMap.set(normalizedTitle, existing);
|
|
2580
|
+
}
|
|
2581
|
+
const results = [];
|
|
2582
|
+
for (const [title, files] of titleMap) {
|
|
2583
|
+
if (files.length <= 1) continue;
|
|
2584
|
+
for (const file of files) {
|
|
2585
|
+
results.push({
|
|
2586
|
+
rule: "duplicate-concept",
|
|
2587
|
+
severity: "error",
|
|
2588
|
+
file,
|
|
2589
|
+
message: `Duplicate title "${title}" \u2014 also in ${files.filter((f) => f !== file).join(", ")}`
|
|
2590
|
+
});
|
|
2591
|
+
}
|
|
2592
|
+
}
|
|
2593
|
+
return results;
|
|
2594
|
+
}
|
|
2595
|
+
async function checkEmptyPages(root) {
|
|
2596
|
+
const pages = await collectAllPages(root);
|
|
2597
|
+
const results = [];
|
|
2598
|
+
for (const page of pages) {
|
|
2599
|
+
const { meta, body } = parseFrontmatter(page.content);
|
|
2600
|
+
const hasTitle = typeof meta.title === "string" && meta.title.trim() !== "";
|
|
2601
|
+
const isBodyEmpty = body.trim().length < MIN_BODY_LENGTH;
|
|
2602
|
+
if (hasTitle && isBodyEmpty) {
|
|
2603
|
+
results.push({
|
|
2604
|
+
rule: "empty-page",
|
|
2605
|
+
severity: "warning",
|
|
2606
|
+
file: page.filePath,
|
|
2607
|
+
message: `Page body is empty or too short (< ${MIN_BODY_LENGTH} chars)`
|
|
2608
|
+
});
|
|
2609
|
+
}
|
|
2610
|
+
}
|
|
2611
|
+
return results;
|
|
2612
|
+
}
|
|
2613
|
+
async function checkLowConfidencePages(root) {
|
|
2614
|
+
const pages = await collectAllPages(root);
|
|
2615
|
+
const results = [];
|
|
2616
|
+
for (const page of pages) {
|
|
2617
|
+
const { meta } = parseFrontmatter(page.content);
|
|
2618
|
+
const { confidence } = parseProvenanceMetadata(meta);
|
|
2619
|
+
if (confidence === void 0 || confidence >= LOW_CONFIDENCE_THRESHOLD) continue;
|
|
2620
|
+
results.push({
|
|
2621
|
+
rule: "low-confidence",
|
|
2622
|
+
severity: "warning",
|
|
2623
|
+
file: page.filePath,
|
|
2624
|
+
message: `Page confidence ${confidence.toFixed(2)} is below ${LOW_CONFIDENCE_THRESHOLD}`
|
|
2625
|
+
});
|
|
2626
|
+
}
|
|
2627
|
+
return results;
|
|
2628
|
+
}
|
|
2629
|
+
async function checkContradictedPages(root) {
|
|
2630
|
+
const pages = await collectAllPages(root);
|
|
2631
|
+
const results = [];
|
|
2632
|
+
for (const page of pages) {
|
|
2633
|
+
const { meta } = parseFrontmatter(page.content);
|
|
2634
|
+
const { contradictedBy } = parseProvenanceMetadata(meta);
|
|
2635
|
+
if (!contradictedBy || contradictedBy.length === 0) continue;
|
|
2636
|
+
const slugs = contradictedBy.map((r) => r.slug).join(", ");
|
|
2637
|
+
results.push({
|
|
2638
|
+
rule: "contradicted-page",
|
|
2639
|
+
severity: "warning",
|
|
2640
|
+
file: page.filePath,
|
|
2641
|
+
message: `Page contradicts: ${slugs}`
|
|
2642
|
+
});
|
|
2643
|
+
}
|
|
2644
|
+
return results;
|
|
2645
|
+
}
|
|
2646
|
+
async function checkInferredWithoutCitations(root) {
|
|
2647
|
+
const pages = await collectAllPages(root);
|
|
2648
|
+
const results = [];
|
|
2649
|
+
for (const page of pages) {
|
|
2650
|
+
const { meta, body } = parseFrontmatter(page.content);
|
|
2651
|
+
const provenance = parseProvenanceMetadata(meta);
|
|
2652
|
+
const inferred = provenance.inferredParagraphs ?? countUncitedProseParagraphs(body);
|
|
2653
|
+
if (inferred <= MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS) continue;
|
|
2654
|
+
results.push({
|
|
2655
|
+
rule: "excess-inferred-paragraphs",
|
|
2656
|
+
severity: "warning",
|
|
2657
|
+
file: page.filePath,
|
|
2658
|
+
message: `Page has ${inferred} inferred paragraphs without citations (max ${MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS})`
|
|
2659
|
+
});
|
|
2660
|
+
}
|
|
2661
|
+
return results;
|
|
2662
|
+
}
|
|
2663
|
+
var PROSE_PARAGRAPH_LEAD = /^[A-Za-z]/;
|
|
2664
|
+
function countUncitedProseParagraphs(body) {
|
|
2665
|
+
const paragraphs = body.split(/\n\s*\n/);
|
|
2666
|
+
let count = 0;
|
|
2667
|
+
for (const block of paragraphs) {
|
|
2668
|
+
const trimmed = block.trim();
|
|
2669
|
+
if (trimmed.length === 0) continue;
|
|
2670
|
+
if (!PROSE_PARAGRAPH_LEAD.test(trimmed)) continue;
|
|
2671
|
+
if (CITATION_PATTERN.test(trimmed)) {
|
|
2672
|
+
CITATION_PATTERN.lastIndex = 0;
|
|
2673
|
+
continue;
|
|
2674
|
+
}
|
|
2675
|
+
CITATION_PATTERN.lastIndex = 0;
|
|
2676
|
+
count += 1;
|
|
2677
|
+
}
|
|
2678
|
+
return count;
|
|
2679
|
+
}
|
|
2680
|
+
function splitCitationFilenames(captured) {
|
|
2681
|
+
return captured.split(",").map((s) => s.trim()).filter((s) => s.length > 0);
|
|
2682
|
+
}
|
|
2683
|
+
async function checkBrokenCitations(root) {
|
|
2684
|
+
const pages = await collectAllPages(root);
|
|
2685
|
+
const sourcesDir = path19.join(root, SOURCES_DIR);
|
|
2686
|
+
const results = [];
|
|
2687
|
+
for (const page of pages) {
|
|
2688
|
+
for (const { captured, line } of findMatchesInContent(page.content, CITATION_PATTERN)) {
|
|
2689
|
+
for (const filename of splitCitationFilenames(captured)) {
|
|
2690
|
+
const citedPath = path19.join(sourcesDir, filename);
|
|
2691
|
+
if (!existsSync8(citedPath)) {
|
|
2692
|
+
results.push({
|
|
2693
|
+
rule: "broken-citation",
|
|
2694
|
+
severity: "error",
|
|
2695
|
+
file: page.filePath,
|
|
2696
|
+
message: `Broken citation ^[${filename}] \u2014 source file not found`,
|
|
2697
|
+
line
|
|
2698
|
+
});
|
|
2699
|
+
}
|
|
2700
|
+
}
|
|
2701
|
+
}
|
|
2702
|
+
}
|
|
2703
|
+
return results;
|
|
2704
|
+
}
|
|
2705
|
+
|
|
2706
|
+
// src/linter/index.ts
|
|
2707
|
+
var ALL_RULES = [
|
|
2708
|
+
checkBrokenWikilinks,
|
|
2709
|
+
checkOrphanedPages,
|
|
2710
|
+
checkMissingSummaries,
|
|
2711
|
+
checkDuplicateConcepts,
|
|
2712
|
+
checkEmptyPages,
|
|
2713
|
+
checkBrokenCitations,
|
|
2714
|
+
checkLowConfidencePages,
|
|
2715
|
+
checkContradictedPages,
|
|
2716
|
+
checkInferredWithoutCitations
|
|
2717
|
+
];
|
|
2718
|
+
function countBySeverity(results, severity) {
|
|
2719
|
+
return results.filter((r) => r.severity === severity).length;
|
|
2720
|
+
}
|
|
2721
|
+
async function lint(root) {
|
|
2722
|
+
const ruleResults = await Promise.all(
|
|
2723
|
+
ALL_RULES.map((rule) => rule(root))
|
|
2724
|
+
);
|
|
2725
|
+
const results = ruleResults.flat();
|
|
2726
|
+
return {
|
|
2727
|
+
errors: countBySeverity(results, "error"),
|
|
2728
|
+
warnings: countBySeverity(results, "warning"),
|
|
2729
|
+
info: countBySeverity(results, "info"),
|
|
2730
|
+
results
|
|
2731
|
+
};
|
|
2732
|
+
}
|
|
2733
|
+
|
|
2734
|
+
// src/commands/lint.ts
|
|
2735
|
+
var SEVERITY_FORMATTERS = {
|
|
2736
|
+
error,
|
|
2737
|
+
warning: warn,
|
|
2738
|
+
info
|
|
2739
|
+
};
|
|
2740
|
+
var SEVERITY_ICONS = {
|
|
2741
|
+
error: "x",
|
|
2742
|
+
warning: "!",
|
|
2743
|
+
info: "i"
|
|
2744
|
+
};
|
|
2745
|
+
function printResult(result) {
|
|
2746
|
+
const formatter = SEVERITY_FORMATTERS[result.severity];
|
|
2747
|
+
const icon = SEVERITY_ICONS[result.severity];
|
|
2748
|
+
const location = result.line ? `${result.file}:${result.line}` : result.file;
|
|
2749
|
+
status(icon, `${formatter(result.severity)} ${dim(location)} ${result.message}`);
|
|
2750
|
+
}
|
|
2751
|
+
async function lintCommand() {
|
|
2752
|
+
header("Linting wiki");
|
|
2753
|
+
const summary = await lint(process.cwd());
|
|
2754
|
+
for (const result of summary.results) {
|
|
2755
|
+
printResult(result);
|
|
2756
|
+
}
|
|
2757
|
+
console.log();
|
|
2758
|
+
const summaryLine = [
|
|
2759
|
+
error(`${summary.errors} error(s)`),
|
|
2760
|
+
warn(`${summary.warnings} warning(s)`),
|
|
2761
|
+
info(`${summary.info} info`)
|
|
2762
|
+
].join(", ");
|
|
2763
|
+
status("*", summaryLine);
|
|
2764
|
+
if (summary.errors > 0) {
|
|
2765
|
+
process.exit(1);
|
|
2766
|
+
}
|
|
2767
|
+
}
|
|
2768
|
+
|
|
2769
|
+
// src/commands/review-list.ts
|
|
2770
|
+
async function reviewListCommand() {
|
|
2771
|
+
header("Pending review candidates");
|
|
2772
|
+
const candidates = await listCandidates(process.cwd());
|
|
2773
|
+
if (candidates.length === 0) {
|
|
2774
|
+
status("\u2713", success("No pending candidates."));
|
|
2775
|
+
return;
|
|
2776
|
+
}
|
|
2777
|
+
for (const candidate of candidates) {
|
|
2778
|
+
const sources = candidate.sources.join(", ");
|
|
2779
|
+
const meta = dim(`${candidate.generatedAt} | sources: ${sources}`);
|
|
2780
|
+
status("?", `${info(candidate.id)} \u2192 ${candidate.slug} ${meta}`);
|
|
2781
|
+
}
|
|
2782
|
+
status(
|
|
2783
|
+
"\u2192",
|
|
2784
|
+
dim(`Use \`llmwiki review show <id>\` to inspect a candidate.`)
|
|
2785
|
+
);
|
|
2786
|
+
}
|
|
2787
|
+
|
|
2788
|
+
// src/commands/review-show.ts
|
|
2789
|
+
async function reviewShowCommand(id) {
|
|
2790
|
+
const candidate = await loadCandidateOrFail(process.cwd(), id);
|
|
2791
|
+
if (!candidate) return;
|
|
2792
|
+
header(`Candidate ${candidate.id}`);
|
|
2793
|
+
status("i", dim(`title: ${candidate.title}`));
|
|
2794
|
+
status("i", dim(`slug: ${candidate.slug}`));
|
|
2795
|
+
status("i", dim(`summary: ${candidate.summary}`));
|
|
2796
|
+
status("i", dim(`sources: ${candidate.sources.join(", ")}`));
|
|
2797
|
+
status("i", dim(`generated: ${candidate.generatedAt}`));
|
|
2798
|
+
console.log();
|
|
2799
|
+
console.log(candidate.body);
|
|
2800
|
+
}
|
|
2801
|
+
|
|
2802
|
+
// src/commands/review-approve.ts
|
|
2803
|
+
import path20 from "path";
|
|
2804
|
+
|
|
2805
|
+
// src/commands/review-helpers.ts
|
|
2806
|
+
async function runReviewUnderLock(id, underLock) {
|
|
2807
|
+
const root = process.cwd();
|
|
2808
|
+
const preCheck = await loadCandidateOrFail(root, id);
|
|
2809
|
+
if (!preCheck) return;
|
|
2810
|
+
const locked = await acquireLock(root);
|
|
2811
|
+
if (!locked) {
|
|
2812
|
+
status("!", error("Could not acquire lock. Try again later."));
|
|
2813
|
+
process.exitCode = 1;
|
|
2814
|
+
return;
|
|
2815
|
+
}
|
|
2816
|
+
try {
|
|
2817
|
+
await underLock(root, id);
|
|
2818
|
+
} finally {
|
|
2819
|
+
await releaseLock(root);
|
|
2820
|
+
}
|
|
2821
|
+
}
|
|
2822
|
+
|
|
2823
|
+
// src/commands/review-approve.ts
|
|
2824
|
+
async function reviewApproveCommand(id) {
|
|
2825
|
+
await runReviewUnderLock(id, approveUnderLock);
|
|
2826
|
+
}
|
|
2827
|
+
async function approveUnderLock(root, id) {
|
|
2828
|
+
const candidate = await loadCandidateUnderLockOrFail(root, id);
|
|
2829
|
+
if (!candidate) return;
|
|
2830
|
+
if (!validateWikiPage(candidate.body)) {
|
|
2831
|
+
status("!", error(`Candidate ${id} failed page validation; not approved.`));
|
|
2832
|
+
process.exitCode = 1;
|
|
2833
|
+
return;
|
|
2834
|
+
}
|
|
2835
|
+
const pagePath = path20.join(root, CONCEPTS_DIR, `${candidate.slug}.md`);
|
|
2836
|
+
await atomicWrite(pagePath, candidate.body);
|
|
2837
|
+
status("+", success(`Approved \u2192 ${source(pagePath)}`));
|
|
2838
|
+
await persistCandidateSourceStates(root, candidate);
|
|
2839
|
+
await refreshWikiAfterApproval(root, candidate.slug);
|
|
2840
|
+
await deleteCandidate(root, id);
|
|
2841
|
+
status("\u2713", dim(`Candidate ${id} cleared.`));
|
|
2842
|
+
}
|
|
2843
|
+
async function persistCandidateSourceStates(root, candidate) {
|
|
2844
|
+
const states = candidate.sourceStates;
|
|
2845
|
+
if (!states) return;
|
|
2846
|
+
const otherSources = await collectOtherCandidateSources(root, candidate.id);
|
|
2847
|
+
for (const [sourceFile, entry] of Object.entries(states)) {
|
|
2848
|
+
if (otherSources.has(sourceFile)) continue;
|
|
2849
|
+
await updateSourceState(root, sourceFile, entry);
|
|
2850
|
+
}
|
|
2851
|
+
}
|
|
2852
|
+
async function collectOtherCandidateSources(root, approvingId) {
|
|
2853
|
+
const pending = await listCandidates(root);
|
|
2854
|
+
const sources = /* @__PURE__ */ new Set();
|
|
2855
|
+
for (const candidate of pending) {
|
|
2856
|
+
if (candidate.id === approvingId) continue;
|
|
2857
|
+
for (const source2 of candidate.sources) sources.add(source2);
|
|
2858
|
+
}
|
|
2859
|
+
return sources;
|
|
2860
|
+
}
|
|
2861
|
+
async function refreshWikiAfterApproval(root, slug) {
|
|
2862
|
+
await resolveLinks(root, [slug], [slug]);
|
|
2863
|
+
await generateIndex(root);
|
|
2864
|
+
await generateMOC(root);
|
|
2865
|
+
await safelyUpdateEmbeddings2(root, [slug]);
|
|
2866
|
+
}
|
|
2867
|
+
async function safelyUpdateEmbeddings2(root, slugs) {
|
|
2868
|
+
try {
|
|
2869
|
+
await updateEmbeddings(root, slugs);
|
|
2870
|
+
} catch (err) {
|
|
2871
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
2872
|
+
status("!", warn(`Skipped embeddings update: ${message}`));
|
|
2873
|
+
}
|
|
2874
|
+
}
|
|
2875
|
+
|
|
2876
|
+
// src/commands/review-reject.ts
|
|
2877
|
+
async function reviewRejectCommand(id) {
|
|
2878
|
+
await runReviewUnderLock(id, rejectUnderLock);
|
|
2879
|
+
}
|
|
2880
|
+
async function rejectUnderLock(root, id) {
|
|
2881
|
+
const candidate = await loadCandidateUnderLockOrFail(root, id);
|
|
2882
|
+
if (!candidate) return;
|
|
2883
|
+
await archiveCandidate(root, id);
|
|
2884
|
+
status(
|
|
2885
|
+
"-",
|
|
2886
|
+
warn(`Rejected candidate ${id} (${candidate.slug}) \u2014 archived, wiki unchanged.`)
|
|
2887
|
+
);
|
|
2888
|
+
}
|
|
2889
|
+
|
|
2890
|
+
// src/mcp/server.ts
|
|
2891
|
+
import { McpServer as McpServer2 } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2892
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
2893
|
+
|
|
2894
|
+
// src/mcp/tools.ts
|
|
2895
|
+
import path21 from "path";
|
|
2896
|
+
import { z } from "zod";
|
|
2897
|
+
|
|
2898
|
+
// src/mcp/provider-check.ts
|
|
2899
|
+
var PROVIDER_KEY_VARS = {
|
|
2900
|
+
anthropic: "ANTHROPIC_API_KEY",
|
|
2901
|
+
openai: "OPENAI_API_KEY",
|
|
2902
|
+
ollama: null,
|
|
2903
|
+
minimax: "MINIMAX_API_KEY"
|
|
2904
|
+
};
|
|
2905
|
+
function ensureProviderAvailable() {
|
|
2906
|
+
const provider = process.env.LLMWIKI_PROVIDER ?? DEFAULT_PROVIDER;
|
|
2907
|
+
if (provider === "anthropic") {
|
|
2908
|
+
const auth = resolveAnthropicAuthFromEnv();
|
|
2909
|
+
if (!auth.apiKey && !auth.authToken) {
|
|
2910
|
+
throw new Error(
|
|
2911
|
+
'Anthropic credentials are required for the "anthropic" provider. Set ANTHROPIC_API_KEY or ANTHROPIC_AUTH_TOKEN.'
|
|
2912
|
+
);
|
|
2913
|
+
}
|
|
2914
|
+
return;
|
|
2915
|
+
}
|
|
2916
|
+
const keyVar = PROVIDER_KEY_VARS[provider];
|
|
2917
|
+
if (keyVar === void 0) {
|
|
2918
|
+
throw new Error(
|
|
2919
|
+
`Unknown provider "${provider}". Supported: ${Object.keys(PROVIDER_KEY_VARS).join(", ")}`
|
|
2920
|
+
);
|
|
2921
|
+
}
|
|
2922
|
+
if (keyVar && !process.env[keyVar]) {
|
|
2923
|
+
throw new Error(
|
|
2924
|
+
`${keyVar} environment variable is required for the "${provider}" provider.`
|
|
2925
|
+
);
|
|
2926
|
+
}
|
|
2927
|
+
}
|
|
2928
|
+
|
|
2929
|
+
// src/mcp/tools.ts
|
|
2930
|
+
var PAGE_DIRS2 = [CONCEPTS_DIR, QUERIES_DIR];
|
|
2931
|
+
function jsonResult(payload) {
|
|
2932
|
+
return {
|
|
2933
|
+
content: [{ type: "text", text: JSON.stringify(payload, null, 2) }],
|
|
2934
|
+
structuredContent: { result: payload }
|
|
2935
|
+
};
|
|
2936
|
+
}
|
|
2937
|
+
function registerWikiTools(server, root) {
|
|
2938
|
+
registerIngestTool(server, root);
|
|
2939
|
+
registerCompileTool(server, root);
|
|
2940
|
+
registerQueryTool(server, root);
|
|
2941
|
+
registerSearchTool(server, root);
|
|
2942
|
+
registerReadTool(server, root);
|
|
2943
|
+
registerLintTool(server, root);
|
|
2944
|
+
registerStatusTool(server, root);
|
|
2945
|
+
}
|
|
2946
|
+
function registerIngestTool(server, root) {
|
|
2947
|
+
server.registerTool(
|
|
2948
|
+
"ingest_source",
|
|
2949
|
+
{
|
|
2950
|
+
title: "Ingest Source",
|
|
2951
|
+
description: "Fetch a URL or copy a local file into sources/. Returns the saved filename, character count, and whether content was truncated to fit the size limit.",
|
|
2952
|
+
inputSchema: {
|
|
2953
|
+
source: z.string().describe("URL (http/https) or absolute path to a .md/.txt file")
|
|
2954
|
+
}
|
|
2955
|
+
},
|
|
2956
|
+
async ({ source: source2 }) => {
|
|
2957
|
+
const previousCwd = process.cwd();
|
|
2958
|
+
try {
|
|
2959
|
+
process.chdir(root);
|
|
2960
|
+
const result = await ingestSource(source2);
|
|
2961
|
+
return jsonResult(result);
|
|
2962
|
+
} finally {
|
|
2963
|
+
process.chdir(previousCwd);
|
|
2964
|
+
}
|
|
2965
|
+
}
|
|
2966
|
+
);
|
|
2967
|
+
}
|
|
2968
|
+
function registerCompileTool(server, root) {
|
|
2969
|
+
server.registerTool(
|
|
2970
|
+
"compile_wiki",
|
|
2971
|
+
{
|
|
2972
|
+
title: "Compile Wiki",
|
|
2973
|
+
description: "Run the incremental compile pipeline: extract concepts from new/changed sources, generate wiki pages, resolve interlinks, and rebuild the index. Requires an LLM provider with credentials.",
|
|
2974
|
+
inputSchema: {}
|
|
2975
|
+
},
|
|
2976
|
+
async () => {
|
|
2977
|
+
ensureProviderAvailable();
|
|
2978
|
+
const result = await compileAndReport(root);
|
|
2979
|
+
return jsonResult(result);
|
|
2980
|
+
}
|
|
2981
|
+
);
|
|
2982
|
+
}
|
|
2983
|
+
function registerQueryTool(server, root) {
|
|
2984
|
+
server.registerTool(
|
|
2985
|
+
"query_wiki",
|
|
2986
|
+
{
|
|
2987
|
+
title: "Query Wiki",
|
|
2988
|
+
description: "Ask a natural-language question. Selects relevant pages with the LLM, loads them, and returns a grounded answer with citations. Set save=true to persist the answer as a wiki page. Requires an LLM provider.",
|
|
2989
|
+
inputSchema: {
|
|
2990
|
+
question: z.string().describe("The natural-language question to answer."),
|
|
2991
|
+
save: z.boolean().optional().describe("Persist the answer as a wiki/queries/ page when true.")
|
|
2992
|
+
}
|
|
2993
|
+
},
|
|
2994
|
+
async ({ question, save }) => {
|
|
2995
|
+
ensureProviderAvailable();
|
|
2996
|
+
const result = await generateAnswer(root, question, { save });
|
|
2997
|
+
return jsonResult(result);
|
|
2998
|
+
}
|
|
2999
|
+
);
|
|
3000
|
+
}
|
|
3001
|
+
function registerSearchTool(server, root) {
|
|
3002
|
+
server.registerTool(
|
|
3003
|
+
"search_pages",
|
|
3004
|
+
{
|
|
3005
|
+
title: "Search Pages",
|
|
3006
|
+
description: "Select pages relevant to a question and return their full content. Uses semantic embeddings when available, falling back to LLM-based selection over the wiki index. Requires an LLM provider.",
|
|
3007
|
+
inputSchema: {
|
|
3008
|
+
question: z.string().describe("The query used to rank pages.")
|
|
3009
|
+
}
|
|
3010
|
+
},
|
|
3011
|
+
async ({ question }) => {
|
|
3012
|
+
ensureProviderAvailable();
|
|
3013
|
+
const slugs = await pickSearchSlugs(root, question);
|
|
3014
|
+
const records = await loadPageRecords(root, slugs);
|
|
3015
|
+
return jsonResult({ pages: records });
|
|
3016
|
+
}
|
|
3017
|
+
);
|
|
3018
|
+
}
|
|
3019
|
+
async function pickSearchSlugs(root, question) {
|
|
3020
|
+
try {
|
|
3021
|
+
const candidates = await findRelevantPages(root, question);
|
|
3022
|
+
if (candidates.length > 0) return candidates.map((c) => c.slug);
|
|
3023
|
+
} catch {
|
|
3024
|
+
}
|
|
3025
|
+
const indexContent = await safeReadFile(path21.join(root, INDEX_FILE));
|
|
3026
|
+
const { pages } = await selectPages(question, indexContent);
|
|
3027
|
+
return pages;
|
|
3028
|
+
}
|
|
3029
|
+
function registerReadTool(server, root) {
|
|
3030
|
+
server.registerTool(
|
|
3031
|
+
"read_page",
|
|
3032
|
+
{
|
|
3033
|
+
title: "Read Page",
|
|
3034
|
+
description: "Read a single wiki page by slug. Searches concepts/ first, then queries/. Returns the parsed frontmatter and body. No LLM call required.",
|
|
3035
|
+
inputSchema: {
|
|
3036
|
+
slug: z.string().describe("Page slug, without .md extension.")
|
|
3037
|
+
}
|
|
3038
|
+
},
|
|
3039
|
+
async ({ slug }) => {
|
|
3040
|
+
const page = await readPage(root, slug);
|
|
3041
|
+
if (!page) {
|
|
3042
|
+
throw new Error(`Page not found: ${slug}`);
|
|
3043
|
+
}
|
|
3044
|
+
return jsonResult(page);
|
|
3045
|
+
}
|
|
3046
|
+
);
|
|
3047
|
+
}
|
|
3048
|
+
function registerLintTool(server, root) {
|
|
3049
|
+
server.registerTool(
|
|
3050
|
+
"lint_wiki",
|
|
3051
|
+
{
|
|
3052
|
+
title: "Lint Wiki",
|
|
3053
|
+
description: "Run rule-based quality checks (broken wikilinks, orphans, duplicates, empty pages, broken citations). Returns structured diagnostics. No LLM call.",
|
|
3054
|
+
inputSchema: {}
|
|
3055
|
+
},
|
|
3056
|
+
async () => {
|
|
3057
|
+
const summary = await lint(root);
|
|
3058
|
+
return jsonResult(summary);
|
|
3059
|
+
}
|
|
3060
|
+
);
|
|
3061
|
+
}
|
|
3062
|
+
function registerStatusTool(server, root) {
|
|
3063
|
+
server.registerTool(
|
|
3064
|
+
"wiki_status",
|
|
3065
|
+
{
|
|
3066
|
+
title: "Wiki Status",
|
|
3067
|
+
description: "Summarize the wiki: page count, source count, last compile time, orphaned pages, and pending source changes. Read-only \u2014 never modifies the workspace.",
|
|
3068
|
+
inputSchema: {}
|
|
3069
|
+
},
|
|
3070
|
+
async () => jsonResult(await collectStatus(root))
|
|
3071
|
+
);
|
|
3072
|
+
}
|
|
3073
|
+
async function collectStatus(root) {
|
|
3074
|
+
const concepts = await collectPageSummaries(path21.join(root, CONCEPTS_DIR));
|
|
3075
|
+
const queries = await collectPageSummaries(path21.join(root, QUERIES_DIR));
|
|
3076
|
+
const state = await readState(root);
|
|
3077
|
+
const changes = await detectChanges(root, state);
|
|
3078
|
+
const orphans = await findOrphanedSlugs(root);
|
|
3079
|
+
const pendingCandidates = await countCandidates(root);
|
|
3080
|
+
const compileTimes = Object.values(state.sources).map((s) => s.compiledAt);
|
|
3081
|
+
const lastCompile = compileTimes.length > 0 ? compileTimes.sort().slice(-1)[0] : null;
|
|
3082
|
+
return {
|
|
3083
|
+
pages: { concepts: concepts.length, queries: queries.length, total: concepts.length + queries.length },
|
|
3084
|
+
sources: Object.keys(state.sources).length,
|
|
3085
|
+
lastCompiledAt: lastCompile,
|
|
3086
|
+
orphanedPages: orphans,
|
|
3087
|
+
pendingCandidates,
|
|
3088
|
+
pendingChanges: changes.filter((c) => c.status !== "unchanged").map((c) => ({ file: c.file, status: c.status }))
|
|
3089
|
+
};
|
|
3090
|
+
}
|
|
3091
|
+
async function findOrphanedSlugs(root) {
|
|
3092
|
+
const scanned = await scanWikiPages(path21.join(root, CONCEPTS_DIR));
|
|
3093
|
+
return scanned.filter(({ meta }) => meta.orphaned).map(({ slug }) => slug);
|
|
3094
|
+
}
|
|
3095
|
+
async function loadPageRecords(root, slugs) {
|
|
3096
|
+
const records = [];
|
|
3097
|
+
for (const slug of slugs) {
|
|
3098
|
+
const page = await readPage(root, slug);
|
|
3099
|
+
if (page) records.push(page);
|
|
3100
|
+
}
|
|
3101
|
+
return records;
|
|
3102
|
+
}
|
|
3103
|
+
async function readPage(root, slug) {
|
|
3104
|
+
for (const dir of PAGE_DIRS2) {
|
|
3105
|
+
const content = await safeReadFile(path21.join(root, dir, `${slug}.md`));
|
|
3106
|
+
if (!content) continue;
|
|
3107
|
+
const { meta, body } = parseFrontmatter(content);
|
|
3108
|
+
if (meta.orphaned) continue;
|
|
3109
|
+
return {
|
|
3110
|
+
slug,
|
|
3111
|
+
title: typeof meta.title === "string" ? meta.title : slug,
|
|
3112
|
+
summary: typeof meta.summary === "string" ? meta.summary : "",
|
|
3113
|
+
body: body.trim()
|
|
3114
|
+
};
|
|
3115
|
+
}
|
|
3116
|
+
return null;
|
|
3117
|
+
}
|
|
3118
|
+
|
|
3119
|
+
// src/mcp/resources.ts
|
|
3120
|
+
import path22 from "path";
|
|
3121
|
+
import { readdir as readdir9 } from "fs/promises";
|
|
3122
|
+
import { ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
3123
|
+
function jsonContent(uri, payload) {
|
|
3124
|
+
return {
|
|
3125
|
+
uri: uri.href,
|
|
3126
|
+
mimeType: "application/json",
|
|
3127
|
+
text: JSON.stringify(payload, null, 2)
|
|
3128
|
+
};
|
|
3129
|
+
}
|
|
3130
|
+
function markdownContent(uri, text) {
|
|
3131
|
+
return {
|
|
3132
|
+
uri: uri.href,
|
|
3133
|
+
mimeType: "text/markdown",
|
|
3134
|
+
text
|
|
3135
|
+
};
|
|
3136
|
+
}
|
|
3137
|
+
function registerWikiResources(server, root) {
|
|
3138
|
+
registerIndexResource(server, root);
|
|
3139
|
+
registerSourcesResource(server, root);
|
|
3140
|
+
registerStateResource(server, root);
|
|
3141
|
+
registerConceptResource(server, root);
|
|
3142
|
+
registerQueryResource(server, root);
|
|
3143
|
+
}
|
|
3144
|
+
function registerIndexResource(server, root) {
|
|
3145
|
+
server.registerResource(
|
|
3146
|
+
"wiki-index",
|
|
3147
|
+
"llmwiki://index",
|
|
3148
|
+
{
|
|
3149
|
+
title: "Wiki Index",
|
|
3150
|
+
description: "Full content of wiki/index.md (auto-generated table of contents).",
|
|
3151
|
+
mimeType: "text/markdown"
|
|
3152
|
+
},
|
|
3153
|
+
async (uri) => {
|
|
3154
|
+
const content = await safeReadFile(path22.join(root, INDEX_FILE));
|
|
3155
|
+
return { contents: [markdownContent(uri, content)] };
|
|
3156
|
+
}
|
|
3157
|
+
);
|
|
3158
|
+
}
|
|
3159
|
+
function registerSourcesResource(server, root) {
|
|
3160
|
+
server.registerResource(
|
|
3161
|
+
"wiki-sources",
|
|
3162
|
+
"llmwiki://sources",
|
|
3163
|
+
{
|
|
3164
|
+
title: "Wiki Sources",
|
|
3165
|
+
description: "List of ingested source files with frontmatter metadata.",
|
|
3166
|
+
mimeType: "application/json"
|
|
3167
|
+
},
|
|
3168
|
+
async (uri) => ({
|
|
3169
|
+
contents: [jsonContent(uri, await listSources(root))]
|
|
3170
|
+
})
|
|
3171
|
+
);
|
|
3172
|
+
}
|
|
3173
|
+
function registerStateResource(server, root) {
|
|
3174
|
+
server.registerResource(
|
|
3175
|
+
"wiki-state",
|
|
3176
|
+
"llmwiki://state",
|
|
3177
|
+
{
|
|
3178
|
+
title: "Compilation State",
|
|
3179
|
+
description: "Per-source hashes, concepts, and last compile times from .llmwiki/state.json.",
|
|
3180
|
+
mimeType: "application/json"
|
|
3181
|
+
},
|
|
3182
|
+
async (uri) => {
|
|
3183
|
+
const state = await readState(root);
|
|
3184
|
+
return { contents: [jsonContent(uri, state)] };
|
|
3185
|
+
}
|
|
3186
|
+
);
|
|
3187
|
+
}
|
|
3188
|
+
function registerConceptResource(server, root) {
|
|
3189
|
+
server.registerResource(
|
|
3190
|
+
"wiki-concept",
|
|
3191
|
+
new ResourceTemplate("llmwiki://concept/{slug}", {
|
|
3192
|
+
list: async () => listPagesUnder(root, CONCEPTS_DIR, "concept")
|
|
3193
|
+
}),
|
|
3194
|
+
{
|
|
3195
|
+
title: "Wiki Concept",
|
|
3196
|
+
description: "A single concept page from wiki/concepts/ \u2014 frontmatter plus body.",
|
|
3197
|
+
mimeType: "application/json"
|
|
3198
|
+
},
|
|
3199
|
+
async (uri, { slug }) => ({
|
|
3200
|
+
contents: [jsonContent(uri, await loadPageWithMeta(root, CONCEPTS_DIR, String(slug)))]
|
|
3201
|
+
})
|
|
3202
|
+
);
|
|
3203
|
+
}
|
|
3204
|
+
function registerQueryResource(server, root) {
|
|
3205
|
+
server.registerResource(
|
|
3206
|
+
"wiki-query",
|
|
3207
|
+
new ResourceTemplate("llmwiki://query/{slug}", {
|
|
3208
|
+
list: async () => listPagesUnder(root, QUERIES_DIR, "query")
|
|
3209
|
+
}),
|
|
3210
|
+
{
|
|
3211
|
+
title: "Wiki Query",
|
|
3212
|
+
description: "A single saved query page from wiki/queries/ \u2014 frontmatter plus body.",
|
|
3213
|
+
mimeType: "application/json"
|
|
3214
|
+
},
|
|
3215
|
+
async (uri, { slug }) => ({
|
|
3216
|
+
contents: [jsonContent(uri, await loadPageWithMeta(root, QUERIES_DIR, String(slug)))]
|
|
3217
|
+
})
|
|
3218
|
+
);
|
|
3219
|
+
}
|
|
3220
|
+
async function listSources(root) {
|
|
3221
|
+
const sourcesPath = path22.join(root, SOURCES_DIR);
|
|
3222
|
+
let files;
|
|
3223
|
+
try {
|
|
3224
|
+
files = await readdir9(sourcesPath);
|
|
3225
|
+
} catch {
|
|
3226
|
+
return [];
|
|
3227
|
+
}
|
|
3228
|
+
const records = [];
|
|
3229
|
+
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
3230
|
+
const content = await safeReadFile(path22.join(sourcesPath, file));
|
|
3231
|
+
const { meta } = parseFrontmatter(content);
|
|
3232
|
+
records.push({ filename: file, ...meta });
|
|
3233
|
+
}
|
|
3234
|
+
return records;
|
|
3235
|
+
}
|
|
3236
|
+
async function loadPageWithMeta(root, dir, slug) {
|
|
3237
|
+
const filePath = path22.join(root, dir, `${slug}.md`);
|
|
3238
|
+
const content = await safeReadFile(filePath);
|
|
3239
|
+
if (!content) {
|
|
3240
|
+
throw new Error(`Page not found: ${dir}/${slug}.md`);
|
|
3241
|
+
}
|
|
3242
|
+
const { meta, body } = parseFrontmatter(content);
|
|
3243
|
+
return { slug, meta, body: body.trim() };
|
|
3244
|
+
}
|
|
3245
|
+
async function listPagesUnder(root, dir, scheme) {
|
|
3246
|
+
const pagesPath = path22.join(root, dir);
|
|
3247
|
+
let files;
|
|
3248
|
+
try {
|
|
3249
|
+
files = await readdir9(pagesPath);
|
|
3250
|
+
} catch {
|
|
3251
|
+
return { resources: [] };
|
|
3252
|
+
}
|
|
3253
|
+
const resources = files.filter((f) => f.endsWith(".md")).map((f) => {
|
|
3254
|
+
const slug = f.replace(/\.md$/, "");
|
|
3255
|
+
return { uri: `llmwiki://${scheme}/${slug}`, name: slug };
|
|
3256
|
+
});
|
|
3257
|
+
return { resources };
|
|
3258
|
+
}
|
|
3259
|
+
|
|
3260
|
+
// src/mcp/server.ts
|
|
3261
|
+
async function startMCPServer(options) {
|
|
3262
|
+
const { root, version: version2 } = options;
|
|
3263
|
+
const server = new McpServer2({ name: "llmwiki", version: version2 }, {
|
|
3264
|
+
instructions: "llmwiki is a knowledge compiler. Use ingest_source to add raw sources, compile_wiki to run the LLM pipeline, query_wiki for grounded answers, and search_pages to retrieve relevant pages. read_page, lint_wiki, and wiki_status work without an API key."
|
|
3265
|
+
});
|
|
3266
|
+
registerWikiTools(server, root);
|
|
3267
|
+
registerWikiResources(server, root);
|
|
3268
|
+
const transport = new StdioServerTransport();
|
|
3269
|
+
await server.connect(transport);
|
|
3270
|
+
}
|
|
3271
|
+
|
|
1353
3272
|
// src/cli.ts
|
|
1354
3273
|
var require2 = createRequire(import.meta.url);
|
|
1355
3274
|
var { version } = require2("../package.json");
|
|
@@ -1363,18 +3282,54 @@ program.command("ingest <source>").description("Ingest a URL or local file into
|
|
|
1363
3282
|
process.exit(1);
|
|
1364
3283
|
}
|
|
1365
3284
|
});
|
|
1366
|
-
program.command("compile").description("Compile sources/ into an interlinked wiki").
|
|
1367
|
-
|
|
3285
|
+
program.command("compile").description("Compile sources/ into an interlinked wiki").option(
|
|
3286
|
+
"--review",
|
|
3287
|
+
"Write generated pages as review candidates under .llmwiki/candidates/ instead of mutating wiki/. Orphan-marking for deleted sources is deferred until the next non-review compile."
|
|
3288
|
+
).action(async (options) => {
|
|
3289
|
+
try {
|
|
3290
|
+
requireProvider();
|
|
3291
|
+
await compileCommand({ review: options.review });
|
|
3292
|
+
} catch (err) {
|
|
3293
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
3294
|
+
process.exit(1);
|
|
3295
|
+
}
|
|
3296
|
+
});
|
|
3297
|
+
var reviewCommand = program.command("review").description("Inspect and act on pending compile review candidates");
|
|
3298
|
+
reviewCommand.command("list").description("List pending review candidates").action(async () => {
|
|
1368
3299
|
try {
|
|
1369
|
-
await
|
|
3300
|
+
await reviewListCommand();
|
|
3301
|
+
} catch (err) {
|
|
3302
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
3303
|
+
process.exit(1);
|
|
3304
|
+
}
|
|
3305
|
+
});
|
|
3306
|
+
reviewCommand.command("show <id>").description("Print a single candidate's metadata and body").action(async (id) => {
|
|
3307
|
+
try {
|
|
3308
|
+
await reviewShowCommand(id);
|
|
3309
|
+
} catch (err) {
|
|
3310
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
3311
|
+
process.exit(1);
|
|
3312
|
+
}
|
|
3313
|
+
});
|
|
3314
|
+
reviewCommand.command("approve <id>").description("Approve a candidate and promote it into wiki/concepts/").action(async (id) => {
|
|
3315
|
+
try {
|
|
3316
|
+
await reviewApproveCommand(id);
|
|
3317
|
+
} catch (err) {
|
|
3318
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
3319
|
+
process.exit(1);
|
|
3320
|
+
}
|
|
3321
|
+
});
|
|
3322
|
+
reviewCommand.command("reject <id>").description("Reject a candidate and archive it without touching wiki/").action(async (id) => {
|
|
3323
|
+
try {
|
|
3324
|
+
await reviewRejectCommand(id);
|
|
1370
3325
|
} catch (err) {
|
|
1371
3326
|
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
1372
3327
|
process.exit(1);
|
|
1373
3328
|
}
|
|
1374
3329
|
});
|
|
1375
3330
|
program.command("query <question>").description("Ask a question against the wiki").option("--save", "Save the answer as a wiki page").action(async (question, options) => {
|
|
1376
|
-
requireApiKey();
|
|
1377
3331
|
try {
|
|
3332
|
+
requireProvider();
|
|
1378
3333
|
await queryCommand(process.cwd(), question, options);
|
|
1379
3334
|
} catch (err) {
|
|
1380
3335
|
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
@@ -1382,21 +3337,64 @@ program.command("query <question>").description("Ask a question against the wiki
|
|
|
1382
3337
|
}
|
|
1383
3338
|
});
|
|
1384
3339
|
program.command("watch").description("Watch sources/ and auto-recompile on changes").action(async () => {
|
|
1385
|
-
requireApiKey();
|
|
1386
3340
|
try {
|
|
3341
|
+
requireProvider();
|
|
1387
3342
|
await watchCommand();
|
|
1388
3343
|
} catch (err) {
|
|
1389
3344
|
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
1390
3345
|
process.exit(1);
|
|
1391
3346
|
}
|
|
1392
3347
|
});
|
|
1393
|
-
program.
|
|
1394
|
-
|
|
1395
|
-
|
|
3348
|
+
program.command("lint").description("Run rule-based quality checks against the wiki").action(async () => {
|
|
3349
|
+
try {
|
|
3350
|
+
await lintCommand();
|
|
3351
|
+
} catch (err) {
|
|
3352
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
3353
|
+
process.exit(1);
|
|
3354
|
+
}
|
|
3355
|
+
});
|
|
3356
|
+
program.command("serve").description("Start an MCP server exposing wiki tools and resources over stdio").option("--root <dir>", "Project root directory", process.cwd()).action(async (options) => {
|
|
3357
|
+
try {
|
|
3358
|
+
await startMCPServer({ root: options.root, version });
|
|
3359
|
+
} catch (err) {
|
|
3360
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
3361
|
+
process.exit(1);
|
|
3362
|
+
}
|
|
3363
|
+
});
|
|
3364
|
+
var PROVIDER_KEY_VARS2 = {
|
|
3365
|
+
anthropic: "ANTHROPIC_API_KEY",
|
|
3366
|
+
openai: "OPENAI_API_KEY",
|
|
3367
|
+
ollama: null,
|
|
3368
|
+
minimax: "MINIMAX_API_KEY"
|
|
3369
|
+
};
|
|
3370
|
+
function requireProvider() {
|
|
3371
|
+
const provider = process.env.LLMWIKI_PROVIDER ?? DEFAULT_PROVIDER;
|
|
3372
|
+
if (provider === "anthropic") {
|
|
3373
|
+
const auth = resolveAnthropicAuthFromEnv();
|
|
3374
|
+
if (!auth.apiKey && !auth.authToken) {
|
|
3375
|
+
console.error(
|
|
3376
|
+
`\x1B[31mError:\x1B[0m Anthropic credentials are required for the "anthropic" provider.
|
|
3377
|
+
Set one of: export ANTHROPIC_API_KEY=<your-key> OR export ANTHROPIC_AUTH_TOKEN=<your-token>`
|
|
3378
|
+
);
|
|
3379
|
+
process.exit(1);
|
|
3380
|
+
}
|
|
3381
|
+
return;
|
|
3382
|
+
}
|
|
3383
|
+
const keyVar = PROVIDER_KEY_VARS2[provider];
|
|
3384
|
+
if (keyVar === void 0) {
|
|
1396
3385
|
console.error(
|
|
1397
|
-
|
|
3386
|
+
`\x1B[31mError:\x1B[0m Unknown provider "${provider}".
|
|
3387
|
+
Supported: ${Object.keys(PROVIDER_KEY_VARS2).join(", ")}`
|
|
3388
|
+
);
|
|
3389
|
+
process.exit(1);
|
|
3390
|
+
}
|
|
3391
|
+
if (keyVar && !process.env[keyVar]) {
|
|
3392
|
+
console.error(
|
|
3393
|
+
`\x1B[31mError:\x1B[0m ${keyVar} environment variable is required for the "${provider}" provider.
|
|
3394
|
+
Set it with: export ${keyVar}=<your-key>`
|
|
1398
3395
|
);
|
|
1399
3396
|
process.exit(1);
|
|
1400
3397
|
}
|
|
1401
3398
|
}
|
|
3399
|
+
program.parse();
|
|
1402
3400
|
//# sourceMappingURL=cli.js.map
|