llm-wiki-compiler 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +119 -11
- package/dist/cli.js +1610 -248
- package/dist/cli.js.map +1 -1
- package/package.json +5 -2
package/dist/cli.js
CHANGED
|
@@ -66,7 +66,14 @@ var COMPILE_CONCURRENCY = 5;
|
|
|
66
66
|
var RETRY_COUNT = 3;
|
|
67
67
|
var RETRY_BASE_MS = 1e3;
|
|
68
68
|
var RETRY_MULTIPLIER = 4;
|
|
69
|
-
var
|
|
69
|
+
var DEFAULT_PROVIDER = "anthropic";
|
|
70
|
+
var PROVIDER_MODELS = {
|
|
71
|
+
anthropic: "claude-sonnet-4-20250514",
|
|
72
|
+
openai: "gpt-4o",
|
|
73
|
+
ollama: "llama3.1",
|
|
74
|
+
minimax: "MiniMax-M2.7"
|
|
75
|
+
};
|
|
76
|
+
var OLLAMA_DEFAULT_HOST = "http://localhost:11434/v1";
|
|
70
77
|
var SOURCES_DIR = "sources";
|
|
71
78
|
var CONCEPTS_DIR = "wiki/concepts";
|
|
72
79
|
var QUERIES_DIR = "wiki/queries";
|
|
@@ -74,6 +81,14 @@ var LLMWIKI_DIR = ".llmwiki";
|
|
|
74
81
|
var STATE_FILE = ".llmwiki/state.json";
|
|
75
82
|
var LOCK_FILE = ".llmwiki/lock";
|
|
76
83
|
var INDEX_FILE = "wiki/index.md";
|
|
84
|
+
var MOC_FILE = "wiki/MOC.md";
|
|
85
|
+
var EMBEDDINGS_FILE = ".llmwiki/embeddings.json";
|
|
86
|
+
var EMBEDDING_TOP_K = 15;
|
|
87
|
+
var EMBEDDING_MODELS = {
|
|
88
|
+
anthropic: "voyage-3-lite",
|
|
89
|
+
openai: "text-embedding-3-small",
|
|
90
|
+
ollama: "nomic-embed-text"
|
|
91
|
+
};
|
|
77
92
|
|
|
78
93
|
// src/utils/output.ts
|
|
79
94
|
var RESET = "\x1B[0m";
|
|
@@ -82,7 +97,6 @@ var DIM = "\x1B[2m";
|
|
|
82
97
|
var GREEN = "\x1B[32m";
|
|
83
98
|
var YELLOW = "\x1B[33m";
|
|
84
99
|
var BLUE = "\x1B[34m";
|
|
85
|
-
var MAGENTA = "\x1B[35m";
|
|
86
100
|
var CYAN = "\x1B[36m";
|
|
87
101
|
var RED = "\x1B[31m";
|
|
88
102
|
function bold(text) {
|
|
@@ -103,9 +117,6 @@ function info(text) {
|
|
|
103
117
|
function error(text) {
|
|
104
118
|
return `${RED}${text}${RESET}`;
|
|
105
119
|
}
|
|
106
|
-
function concept(text) {
|
|
107
|
-
return `${MAGENTA}${BOLD}${text}${RESET}`;
|
|
108
|
-
}
|
|
109
120
|
function source(text) {
|
|
110
121
|
return `${CYAN}${text}${RESET}`;
|
|
111
122
|
}
|
|
@@ -238,26 +249,36 @@ async function saveSource(title, document) {
|
|
|
238
249
|
await writeFile2(destPath, document, "utf-8");
|
|
239
250
|
return destPath;
|
|
240
251
|
}
|
|
241
|
-
async function
|
|
252
|
+
async function ingestSource(source2) {
|
|
242
253
|
status("*", info(`Ingesting: ${source2}`));
|
|
243
254
|
const { title, content } = isUrl(source2) ? await ingestWeb(source2) : await ingestFile(source2);
|
|
244
255
|
const result = enforceCharLimit(content);
|
|
245
256
|
enforceMinContent(result.content);
|
|
246
257
|
const document = buildDocument(title, source2, result);
|
|
247
258
|
const savedPath = await saveSource(title, document);
|
|
259
|
+
return {
|
|
260
|
+
filename: path3.basename(savedPath),
|
|
261
|
+
charCount: result.content.length,
|
|
262
|
+
truncated: result.truncated,
|
|
263
|
+
source: source2
|
|
264
|
+
};
|
|
265
|
+
}
|
|
266
|
+
async function ingest(source2) {
|
|
267
|
+
const result = await ingestSource(source2);
|
|
268
|
+
const savedPath = path3.join(SOURCES_DIR, result.filename);
|
|
248
269
|
status(
|
|
249
270
|
"+",
|
|
250
|
-
success(`Saved ${bold(
|
|
271
|
+
success(`Saved ${bold(result.filename)} \u2192 ${source(savedPath)}`)
|
|
251
272
|
);
|
|
252
273
|
status("\u2192", dim("Next: llmwiki compile"));
|
|
253
274
|
}
|
|
254
275
|
|
|
255
276
|
// src/commands/compile.ts
|
|
256
|
-
import { existsSync as
|
|
277
|
+
import { existsSync as existsSync4 } from "fs";
|
|
257
278
|
|
|
258
279
|
// src/compiler/index.ts
|
|
259
|
-
import { readFile as
|
|
260
|
-
import
|
|
280
|
+
import { readFile as readFile8, readdir as readdir6 } from "fs/promises";
|
|
281
|
+
import path13 from "path";
|
|
261
282
|
|
|
262
283
|
// src/utils/state.ts
|
|
263
284
|
import { readFile as readFile3, writeFile as writeFile3, rename as rename2, mkdir as mkdir3, copyFile } from "fs/promises";
|
|
@@ -300,30 +321,387 @@ async function removeSourceState(root, sourceFile) {
|
|
|
300
321
|
await writeState(root, state);
|
|
301
322
|
}
|
|
302
323
|
|
|
303
|
-
// src/
|
|
324
|
+
// src/providers/anthropic.ts
|
|
304
325
|
import Anthropic from "@anthropic-ai/sdk";
|
|
305
|
-
var
|
|
306
|
-
function
|
|
307
|
-
|
|
308
|
-
|
|
326
|
+
var VOYAGE_EMBEDDINGS_URL = "https://api.voyageai.com/v1/embeddings";
|
|
327
|
+
function buildAnthropicClientOptions(options = {}) {
|
|
328
|
+
const trimmedBaseURL = options.baseURL?.trim();
|
|
329
|
+
const trimmedApiKey = options.apiKey?.trim();
|
|
330
|
+
const trimmedAuthToken = options.authToken?.trim();
|
|
331
|
+
const result = {};
|
|
332
|
+
if (trimmedApiKey) {
|
|
333
|
+
result.apiKey = trimmedApiKey;
|
|
334
|
+
}
|
|
335
|
+
if (trimmedAuthToken) {
|
|
336
|
+
result.authToken = trimmedAuthToken;
|
|
337
|
+
}
|
|
338
|
+
if (!trimmedBaseURL) {
|
|
339
|
+
return result;
|
|
340
|
+
}
|
|
341
|
+
const normalizedBaseURL = trimmedBaseURL.endsWith("/") && trimmedBaseURL.length > 1 ? trimmedBaseURL.slice(0, -1) : trimmedBaseURL;
|
|
342
|
+
result.baseURL = normalizedBaseURL;
|
|
343
|
+
return result;
|
|
344
|
+
}
|
|
345
|
+
var AnthropicProvider = class {
|
|
346
|
+
client;
|
|
347
|
+
model;
|
|
348
|
+
constructor(model, options = {}) {
|
|
349
|
+
this.model = model;
|
|
350
|
+
this.client = new Anthropic(buildAnthropicClientOptions(options));
|
|
351
|
+
}
|
|
352
|
+
/** Send a single non-streaming completion request. */
|
|
353
|
+
async complete(system, messages, maxTokens) {
|
|
354
|
+
const response = await this.client.messages.create({
|
|
355
|
+
model: this.model,
|
|
356
|
+
max_tokens: maxTokens,
|
|
357
|
+
system,
|
|
358
|
+
messages
|
|
359
|
+
});
|
|
360
|
+
const textBlock = response.content.find((block) => block.type === "text");
|
|
361
|
+
return textBlock?.type === "text" ? textBlock.text : "";
|
|
362
|
+
}
|
|
363
|
+
/** Stream a completion, invoking onToken for each text chunk. */
|
|
364
|
+
async stream(system, messages, maxTokens, onToken) {
|
|
365
|
+
const stream = this.client.messages.stream({
|
|
366
|
+
model: this.model,
|
|
367
|
+
max_tokens: maxTokens,
|
|
368
|
+
system,
|
|
369
|
+
messages
|
|
370
|
+
});
|
|
371
|
+
let fullText = "";
|
|
372
|
+
for await (const event of stream) {
|
|
373
|
+
if (event.type === "content_block_delta" && event.delta.type === "text_delta") {
|
|
374
|
+
fullText += event.delta.text;
|
|
375
|
+
onToken?.(event.delta.text);
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
return fullText;
|
|
379
|
+
}
|
|
380
|
+
/** Call Claude with tool definitions and return the parsed tool input as JSON. */
|
|
381
|
+
async toolCall(system, messages, tools, maxTokens) {
|
|
382
|
+
const anthropicTools = tools.map((t) => ({
|
|
383
|
+
name: t.name,
|
|
384
|
+
description: t.description,
|
|
385
|
+
input_schema: t.input_schema
|
|
386
|
+
}));
|
|
387
|
+
const response = await this.client.messages.create({
|
|
388
|
+
model: this.model,
|
|
389
|
+
max_tokens: maxTokens,
|
|
390
|
+
system,
|
|
391
|
+
messages,
|
|
392
|
+
tools: anthropicTools
|
|
393
|
+
});
|
|
394
|
+
const toolBlock = response.content.find((block) => block.type === "tool_use");
|
|
395
|
+
if (toolBlock?.type === "tool_use") {
|
|
396
|
+
return JSON.stringify(toolBlock.input);
|
|
397
|
+
}
|
|
398
|
+
const textBlock = response.content.find((block) => block.type === "text");
|
|
399
|
+
return textBlock?.type === "text" ? textBlock.text : "";
|
|
400
|
+
}
|
|
401
|
+
/**
|
|
402
|
+
* Produce a single embedding vector via the Voyage API.
|
|
403
|
+
*
|
|
404
|
+
* Anthropic does not ship a first-party embeddings endpoint, so we delegate
|
|
405
|
+
* to Voyage (their recommended partner). Requires VOYAGE_API_KEY.
|
|
406
|
+
*/
|
|
407
|
+
async embed(text) {
|
|
408
|
+
const apiKey = process.env.VOYAGE_API_KEY?.trim();
|
|
409
|
+
if (!apiKey) {
|
|
410
|
+
throw new Error(
|
|
411
|
+
"VOYAGE_API_KEY is not set. Anthropic embeddings use Voyage \u2014 set VOYAGE_API_KEY to enable semantic search."
|
|
412
|
+
);
|
|
413
|
+
}
|
|
414
|
+
const response = await fetch(VOYAGE_EMBEDDINGS_URL, {
|
|
415
|
+
method: "POST",
|
|
416
|
+
headers: {
|
|
417
|
+
"Content-Type": "application/json",
|
|
418
|
+
Authorization: `Bearer ${apiKey}`
|
|
419
|
+
},
|
|
420
|
+
body: JSON.stringify({ input: text, model: EMBEDDING_MODELS.anthropic })
|
|
421
|
+
});
|
|
422
|
+
if (!response.ok) {
|
|
423
|
+
const detail = await response.text();
|
|
424
|
+
throw new Error(`Voyage embeddings request failed (${response.status}): ${detail}`);
|
|
425
|
+
}
|
|
426
|
+
const json = await response.json();
|
|
427
|
+
const vector = json.data?.[0]?.embedding;
|
|
428
|
+
if (!Array.isArray(vector)) {
|
|
429
|
+
throw new Error("Voyage embeddings response did not include a vector.");
|
|
430
|
+
}
|
|
431
|
+
return vector;
|
|
432
|
+
}
|
|
433
|
+
};
|
|
434
|
+
|
|
435
|
+
// src/providers/openai.ts
|
|
436
|
+
import OpenAI from "openai";
|
|
437
|
+
function translateToolToOpenAI(tool) {
|
|
438
|
+
return {
|
|
439
|
+
type: "function",
|
|
440
|
+
function: {
|
|
441
|
+
name: tool.name,
|
|
442
|
+
description: tool.description,
|
|
443
|
+
parameters: tool.input_schema
|
|
444
|
+
}
|
|
445
|
+
};
|
|
446
|
+
}
|
|
447
|
+
var OpenAIProvider = class {
|
|
448
|
+
client;
|
|
449
|
+
model;
|
|
450
|
+
constructor(model, baseURL, apiKey) {
|
|
451
|
+
this.model = model;
|
|
452
|
+
const resolvedKey = apiKey ?? process.env.OPENAI_API_KEY ?? "";
|
|
453
|
+
this.client = new OpenAI({
|
|
454
|
+
apiKey: resolvedKey,
|
|
455
|
+
...baseURL ? { baseURL } : {}
|
|
456
|
+
});
|
|
457
|
+
}
|
|
458
|
+
/** Send a single non-streaming completion request. */
|
|
459
|
+
async complete(system, messages, maxTokens) {
|
|
460
|
+
const response = await this.client.chat.completions.create({
|
|
461
|
+
model: this.model,
|
|
462
|
+
max_tokens: maxTokens,
|
|
463
|
+
messages: [{ role: "system", content: system }, ...messages]
|
|
464
|
+
});
|
|
465
|
+
return response.choices[0]?.message?.content ?? "";
|
|
466
|
+
}
|
|
467
|
+
/** Stream a completion, invoking onToken for each text chunk. */
|
|
468
|
+
async stream(system, messages, maxTokens, onToken) {
|
|
469
|
+
const stream = await this.client.chat.completions.create({
|
|
470
|
+
model: this.model,
|
|
471
|
+
max_tokens: maxTokens,
|
|
472
|
+
messages: [{ role: "system", content: system }, ...messages],
|
|
473
|
+
stream: true
|
|
474
|
+
});
|
|
475
|
+
let fullText = "";
|
|
476
|
+
for await (const chunk of stream) {
|
|
477
|
+
const delta = chunk.choices[0]?.delta?.content;
|
|
478
|
+
if (delta) {
|
|
479
|
+
fullText += delta;
|
|
480
|
+
onToken?.(delta);
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
return fullText;
|
|
484
|
+
}
|
|
485
|
+
/** Call the model with tool definitions and return the parsed tool input as JSON. */
|
|
486
|
+
async toolCall(system, messages, tools, maxTokens) {
|
|
487
|
+
const openaiTools = tools.map(translateToolToOpenAI);
|
|
488
|
+
const response = await this.client.chat.completions.create({
|
|
489
|
+
model: this.model,
|
|
490
|
+
max_tokens: maxTokens,
|
|
491
|
+
messages: [{ role: "system", content: system }, ...messages],
|
|
492
|
+
tools: openaiTools
|
|
493
|
+
});
|
|
494
|
+
const toolCalls = response.choices[0]?.message?.tool_calls;
|
|
495
|
+
if (toolCalls && toolCalls.length > 0) {
|
|
496
|
+
return toolCalls[0].function.arguments;
|
|
497
|
+
}
|
|
498
|
+
return response.choices[0]?.message?.content ?? "";
|
|
499
|
+
}
|
|
500
|
+
/**
|
|
501
|
+
* Produce a single embedding vector via the OpenAI embeddings API.
|
|
502
|
+
* Subclasses (e.g. Ollama) override embeddingModel() to pick a different model.
|
|
503
|
+
*/
|
|
504
|
+
async embed(text) {
|
|
505
|
+
const response = await this.client.embeddings.create({
|
|
506
|
+
model: this.embeddingModel(),
|
|
507
|
+
input: text
|
|
508
|
+
});
|
|
509
|
+
const vector = response.data[0]?.embedding;
|
|
510
|
+
if (!Array.isArray(vector)) {
|
|
511
|
+
throw new Error("OpenAI embeddings response did not include a vector.");
|
|
512
|
+
}
|
|
513
|
+
return vector;
|
|
514
|
+
}
|
|
515
|
+
/** Default embedding model for this provider. Subclasses may override. */
|
|
516
|
+
embeddingModel() {
|
|
517
|
+
return EMBEDDING_MODELS.openai;
|
|
518
|
+
}
|
|
519
|
+
};
|
|
520
|
+
|
|
521
|
+
// src/providers/ollama.ts
|
|
522
|
+
var OllamaProvider = class extends OpenAIProvider {
|
|
523
|
+
constructor(model, baseURL) {
|
|
524
|
+
super(model, baseURL, "ollama");
|
|
525
|
+
}
|
|
526
|
+
/** Ollama ships a dedicated embedding model (nomic-embed-text). */
|
|
527
|
+
embeddingModel() {
|
|
528
|
+
return EMBEDDING_MODELS.ollama;
|
|
529
|
+
}
|
|
530
|
+
};
|
|
531
|
+
|
|
532
|
+
// src/providers/minimax.ts
|
|
533
|
+
var MINIMAX_BASE_URL = "https://api.minimax.io/v1";
|
|
534
|
+
var MiniMaxProvider = class extends OpenAIProvider {
|
|
535
|
+
constructor(model, apiKey) {
|
|
536
|
+
super(model, MINIMAX_BASE_URL, apiKey);
|
|
537
|
+
}
|
|
538
|
+
};
|
|
539
|
+
|
|
540
|
+
// src/utils/claude-settings.ts
|
|
541
|
+
import { readFileSync } from "fs";
|
|
542
|
+
import { homedir } from "os";
|
|
543
|
+
import path5 from "path";
|
|
544
|
+
var CLAUDE_SETTINGS_PATH_ENV = "LLMWIKI_CLAUDE_SETTINGS_PATH";
|
|
545
|
+
function isRecord(value) {
|
|
546
|
+
return typeof value === "object" && value !== null;
|
|
547
|
+
}
|
|
548
|
+
function normalize(value) {
|
|
549
|
+
if (typeof value !== "string") return void 0;
|
|
550
|
+
const trimmed = value.trim();
|
|
551
|
+
return trimmed.length > 0 ? trimmed : void 0;
|
|
552
|
+
}
|
|
553
|
+
function resolveClaudeSettingsPath(env) {
|
|
554
|
+
return env[CLAUDE_SETTINGS_PATH_ENV] ?? path5.join(homedir(), ".claude", "settings.json");
|
|
555
|
+
}
|
|
556
|
+
function readClaudeSettingsFile(settingsPath) {
|
|
557
|
+
try {
|
|
558
|
+
return readFileSync(settingsPath, "utf8");
|
|
559
|
+
} catch (err) {
|
|
560
|
+
if (isRecord(err) && err.code === "ENOENT") {
|
|
561
|
+
return void 0;
|
|
562
|
+
}
|
|
563
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
564
|
+
throw new Error(`Failed to read Claude settings at "${settingsPath}": ${message}`);
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
function readClaudeSettingsEnv(env = process.env) {
|
|
568
|
+
const settingsPath = resolveClaudeSettingsPath(env);
|
|
569
|
+
const raw = readClaudeSettingsFile(settingsPath);
|
|
570
|
+
if (!raw) return void 0;
|
|
571
|
+
let parsed;
|
|
572
|
+
try {
|
|
573
|
+
parsed = JSON.parse(raw);
|
|
574
|
+
} catch (err) {
|
|
575
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
576
|
+
throw new Error(`Failed to parse Claude settings at "${settingsPath}": ${message}`);
|
|
309
577
|
}
|
|
310
|
-
|
|
578
|
+
if (!isRecord(parsed) || !isRecord(parsed.env)) {
|
|
579
|
+
return void 0;
|
|
580
|
+
}
|
|
581
|
+
const values = {
|
|
582
|
+
ANTHROPIC_API_KEY: normalize(parsed.env.ANTHROPIC_API_KEY),
|
|
583
|
+
ANTHROPIC_AUTH_TOKEN: normalize(parsed.env.ANTHROPIC_AUTH_TOKEN),
|
|
584
|
+
ANTHROPIC_BASE_URL: normalize(parsed.env.ANTHROPIC_BASE_URL),
|
|
585
|
+
ANTHROPIC_MODEL: normalize(parsed.env.ANTHROPIC_MODEL)
|
|
586
|
+
};
|
|
587
|
+
if (!values.ANTHROPIC_API_KEY && !values.ANTHROPIC_AUTH_TOKEN && !values.ANTHROPIC_BASE_URL && !values.ANTHROPIC_MODEL) {
|
|
588
|
+
return void 0;
|
|
589
|
+
}
|
|
590
|
+
return values;
|
|
591
|
+
}
|
|
592
|
+
function tryReadClaudeSettingsEnv(env) {
|
|
593
|
+
try {
|
|
594
|
+
return readClaudeSettingsEnv(env);
|
|
595
|
+
} catch {
|
|
596
|
+
return void 0;
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
function validateAnthropicBaseURL(value) {
|
|
600
|
+
const normalized = value.trim();
|
|
601
|
+
try {
|
|
602
|
+
const parsed = new URL(normalized);
|
|
603
|
+
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
|
|
604
|
+
throw new Error("Must use http:// or https:// protocol.");
|
|
605
|
+
}
|
|
606
|
+
} catch (err) {
|
|
607
|
+
const message = err instanceof Error ? err.message : "Must be a valid http(s) URL.";
|
|
608
|
+
throw new Error(`Invalid ANTHROPIC_BASE_URL: "${normalized}". ${message}`);
|
|
609
|
+
}
|
|
610
|
+
return normalized;
|
|
611
|
+
}
|
|
612
|
+
function resolveAnthropicAuthFromEnv(env = process.env) {
|
|
613
|
+
const explicitApiKey = normalize(env.ANTHROPIC_API_KEY);
|
|
614
|
+
if (explicitApiKey) return { apiKey: explicitApiKey };
|
|
615
|
+
const explicitAuthToken = normalize(env.ANTHROPIC_AUTH_TOKEN);
|
|
616
|
+
if (explicitAuthToken) return { authToken: explicitAuthToken };
|
|
617
|
+
const fallback = readClaudeSettingsEnv(env);
|
|
618
|
+
if (fallback?.ANTHROPIC_API_KEY) return { apiKey: fallback.ANTHROPIC_API_KEY };
|
|
619
|
+
if (fallback?.ANTHROPIC_AUTH_TOKEN) return { authToken: fallback.ANTHROPIC_AUTH_TOKEN };
|
|
620
|
+
return {};
|
|
621
|
+
}
|
|
622
|
+
function resolveAnthropicModelFromEnv(env = process.env) {
|
|
623
|
+
const explicitModel = env.LLMWIKI_MODEL;
|
|
624
|
+
if (explicitModel !== void 0) return explicitModel;
|
|
625
|
+
return tryReadClaudeSettingsEnv(env)?.ANTHROPIC_MODEL;
|
|
626
|
+
}
|
|
627
|
+
function resolveAnthropicBaseURLFromEnv(env = process.env) {
|
|
628
|
+
const explicitBaseURL = normalize(env.ANTHROPIC_BASE_URL);
|
|
629
|
+
if (explicitBaseURL) return validateAnthropicBaseURL(explicitBaseURL);
|
|
630
|
+
const fallbackBaseURL = tryReadClaudeSettingsEnv(env)?.ANTHROPIC_BASE_URL;
|
|
631
|
+
if (!fallbackBaseURL) return void 0;
|
|
632
|
+
return validateAnthropicBaseURL(fallbackBaseURL);
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
// src/utils/provider.ts
|
|
636
|
+
var SUPPORTED_PROVIDERS = /* @__PURE__ */ new Set(["anthropic", "openai", "ollama", "minimax"]);
|
|
637
|
+
function getProvider() {
|
|
638
|
+
const providerName = getProviderName();
|
|
639
|
+
switch (providerName) {
|
|
640
|
+
case "anthropic":
|
|
641
|
+
return getAnthropicProvider();
|
|
642
|
+
case "openai":
|
|
643
|
+
return new OpenAIProvider(getModelForProvider("openai"));
|
|
644
|
+
case "ollama":
|
|
645
|
+
return new OllamaProvider(
|
|
646
|
+
getModelForProvider("ollama"),
|
|
647
|
+
process.env.OLLAMA_HOST ?? OLLAMA_DEFAULT_HOST
|
|
648
|
+
);
|
|
649
|
+
case "minimax":
|
|
650
|
+
return getMiniMaxProvider();
|
|
651
|
+
default:
|
|
652
|
+
throw new Error(`Unhandled provider: ${providerName}`);
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
function getModelForProvider(providerName) {
|
|
656
|
+
return process.env.LLMWIKI_MODEL ?? PROVIDER_MODELS[providerName];
|
|
657
|
+
}
|
|
658
|
+
function getMiniMaxProvider() {
|
|
659
|
+
const apiKey = process.env.MINIMAX_API_KEY;
|
|
660
|
+
if (!apiKey) {
|
|
661
|
+
throw new Error(
|
|
662
|
+
"MiniMax provider requires MINIMAX_API_KEY environment variable.\n Set it with: export MINIMAX_API_KEY=your_key"
|
|
663
|
+
);
|
|
664
|
+
}
|
|
665
|
+
return new MiniMaxProvider(getModelForProvider("minimax"), apiKey);
|
|
311
666
|
}
|
|
667
|
+
function getAnthropicProvider() {
|
|
668
|
+
const model = resolveAnthropicModelFromEnv() ?? PROVIDER_MODELS.anthropic;
|
|
669
|
+
const baseURL = resolveAnthropicBaseURLFromEnv();
|
|
670
|
+
const auth = resolveAnthropicAuthFromEnv();
|
|
671
|
+
return new AnthropicProvider(model, {
|
|
672
|
+
baseURL,
|
|
673
|
+
...auth
|
|
674
|
+
});
|
|
675
|
+
}
|
|
676
|
+
function getProviderName() {
|
|
677
|
+
const providerName = process.env.LLMWIKI_PROVIDER ?? DEFAULT_PROVIDER;
|
|
678
|
+
if (!SUPPORTED_PROVIDERS.has(providerName)) {
|
|
679
|
+
throw new Error(
|
|
680
|
+
`Unknown provider "${providerName}". Supported: ${[...SUPPORTED_PROVIDERS].join(", ")}`
|
|
681
|
+
);
|
|
682
|
+
}
|
|
683
|
+
return providerName;
|
|
684
|
+
}
|
|
685
|
+
function getActiveProviderName() {
|
|
686
|
+
return getProviderName();
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
// src/utils/llm.ts
|
|
312
690
|
function sleep(ms) {
|
|
313
691
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
314
692
|
}
|
|
315
693
|
async function callClaude(options) {
|
|
316
694
|
const { system, messages, tools, maxTokens = 4096, stream = false, onToken } = options;
|
|
317
|
-
const
|
|
695
|
+
const provider = getProvider();
|
|
318
696
|
for (let attempt = 0; attempt <= RETRY_COUNT; attempt++) {
|
|
319
697
|
try {
|
|
320
698
|
if (stream) {
|
|
321
|
-
return await
|
|
699
|
+
return await provider.stream(system, messages, maxTokens, onToken);
|
|
322
700
|
}
|
|
323
701
|
if (tools && tools.length > 0) {
|
|
324
|
-
return await
|
|
702
|
+
return await provider.toolCall(system, messages, tools, maxTokens);
|
|
325
703
|
}
|
|
326
|
-
return await
|
|
704
|
+
return await provider.complete(system, messages, maxTokens);
|
|
327
705
|
} catch (error2) {
|
|
328
706
|
if (attempt === RETRY_COUNT) throw error2;
|
|
329
707
|
const delayMs = RETRY_BASE_MS * Math.pow(RETRY_MULTIPLIER, attempt);
|
|
@@ -335,57 +713,10 @@ async function callClaude(options) {
|
|
|
335
713
|
}
|
|
336
714
|
throw new Error("Unreachable");
|
|
337
715
|
}
|
|
338
|
-
async function callClaudeStreaming(anthropic, system, messages, maxTokens, onToken) {
|
|
339
|
-
const stream = anthropic.messages.stream({
|
|
340
|
-
model: MODEL,
|
|
341
|
-
max_tokens: maxTokens,
|
|
342
|
-
system,
|
|
343
|
-
messages
|
|
344
|
-
});
|
|
345
|
-
let fullText = "";
|
|
346
|
-
for await (const event of stream) {
|
|
347
|
-
if (event.type === "content_block_delta" && event.delta.type === "text_delta") {
|
|
348
|
-
fullText += event.delta.text;
|
|
349
|
-
onToken?.(event.delta.text);
|
|
350
|
-
}
|
|
351
|
-
}
|
|
352
|
-
return fullText;
|
|
353
|
-
}
|
|
354
|
-
async function callClaudeToolUse(anthropic, system, messages, tools, maxTokens) {
|
|
355
|
-
const response = await anthropic.messages.create({
|
|
356
|
-
model: MODEL,
|
|
357
|
-
max_tokens: maxTokens,
|
|
358
|
-
system,
|
|
359
|
-
messages,
|
|
360
|
-
tools
|
|
361
|
-
});
|
|
362
|
-
const toolBlock = response.content.find((block) => block.type === "tool_use");
|
|
363
|
-
if (toolBlock && toolBlock.type === "tool_use") {
|
|
364
|
-
return JSON.stringify(toolBlock.input);
|
|
365
|
-
}
|
|
366
|
-
const textBlock = response.content.find((block) => block.type === "text");
|
|
367
|
-
if (textBlock && textBlock.type === "text") {
|
|
368
|
-
return textBlock.text;
|
|
369
|
-
}
|
|
370
|
-
return "";
|
|
371
|
-
}
|
|
372
|
-
async function callClaudeBasic(anthropic, system, messages, maxTokens) {
|
|
373
|
-
const response = await anthropic.messages.create({
|
|
374
|
-
model: MODEL,
|
|
375
|
-
max_tokens: maxTokens,
|
|
376
|
-
system,
|
|
377
|
-
messages
|
|
378
|
-
});
|
|
379
|
-
const textBlock = response.content.find((block) => block.type === "text");
|
|
380
|
-
if (textBlock && textBlock.type === "text") {
|
|
381
|
-
return textBlock.text;
|
|
382
|
-
}
|
|
383
|
-
return "";
|
|
384
|
-
}
|
|
385
716
|
|
|
386
717
|
// src/utils/lock.ts
|
|
387
718
|
import { open, readFile as readFile4, unlink, mkdir as mkdir4 } from "fs/promises";
|
|
388
|
-
import
|
|
719
|
+
import path6 from "path";
|
|
389
720
|
var RECLAIM_SUFFIX = ".reclaim";
|
|
390
721
|
var MAX_ACQUIRE_ATTEMPTS = 2;
|
|
391
722
|
function isProcessAlive(pid) {
|
|
@@ -397,8 +728,8 @@ function isProcessAlive(pid) {
|
|
|
397
728
|
}
|
|
398
729
|
}
|
|
399
730
|
async function acquireLock(root) {
|
|
400
|
-
const lockPath =
|
|
401
|
-
await mkdir4(
|
|
731
|
+
const lockPath = path6.join(root, LOCK_FILE);
|
|
732
|
+
await mkdir4(path6.join(root, LLMWIKI_DIR), { recursive: true });
|
|
402
733
|
for (let attempt = 0; attempt < MAX_ACQUIRE_ATTEMPTS; attempt++) {
|
|
403
734
|
const created = await tryCreateLock(lockPath);
|
|
404
735
|
if (created) return true;
|
|
@@ -470,7 +801,7 @@ async function isLockStale(lockPath) {
|
|
|
470
801
|
}
|
|
471
802
|
}
|
|
472
803
|
async function releaseLock(root) {
|
|
473
|
-
const lockPath =
|
|
804
|
+
const lockPath = path6.join(root, LOCK_FILE);
|
|
474
805
|
try {
|
|
475
806
|
await unlink(lockPath);
|
|
476
807
|
} catch {
|
|
@@ -500,6 +831,11 @@ var CONCEPT_EXTRACTION_TOOL = {
|
|
|
500
831
|
is_new: {
|
|
501
832
|
type: "boolean",
|
|
502
833
|
description: "True if this is a new concept not in existing wiki"
|
|
834
|
+
},
|
|
835
|
+
tags: {
|
|
836
|
+
type: "array",
|
|
837
|
+
items: { type: "string" },
|
|
838
|
+
description: "2-4 categorical tags for organizing this concept (e.g., 'machine-learning', 'optimization')"
|
|
503
839
|
}
|
|
504
840
|
},
|
|
505
841
|
required: ["concept", "summary", "is_new"]
|
|
@@ -526,7 +862,7 @@ ${existingIndex}` : "\n\nNo existing wiki pages yet.";
|
|
|
526
862
|
sourceContent
|
|
527
863
|
].join("\n");
|
|
528
864
|
}
|
|
529
|
-
function buildPagePrompt(
|
|
865
|
+
function buildPagePrompt(concept, sourceContent, existingPage, relatedPages) {
|
|
530
866
|
const existingSection = existingPage ? `
|
|
531
867
|
|
|
532
868
|
Existing page to update:
|
|
@@ -538,11 +874,17 @@ Related wiki pages for cross-referencing:
|
|
|
538
874
|
|
|
539
875
|
${relatedPages}` : "";
|
|
540
876
|
return [
|
|
541
|
-
`You are a wiki author. Write a clear, well-structured markdown page about "${
|
|
877
|
+
`You are a wiki author. Write a clear, well-structured markdown page about "${concept}".`,
|
|
542
878
|
"Draw facts only from the provided source material.",
|
|
543
879
|
"Include a ## Sources section at the end listing the source document.",
|
|
544
880
|
"Suggest [[wikilinks]] to related concepts where appropriate.",
|
|
545
881
|
"Write in a neutral, informative tone. Be concise but thorough.",
|
|
882
|
+
"",
|
|
883
|
+
"Source attribution: at the end of each prose paragraph, append a citation",
|
|
884
|
+
"marker showing which source file(s) the paragraph drew from.",
|
|
885
|
+
"Format: ^[filename.md] for single-source, ^[source-a.md, source-b.md] for multi-source.",
|
|
886
|
+
"Place citations only at the end of prose paragraphs \u2014 not on headings, list items, or code blocks.",
|
|
887
|
+
"Source filenames are visible as `--- SOURCE: filename.md ---` headers in the content below.",
|
|
546
888
|
existingSection,
|
|
547
889
|
relatedSection,
|
|
548
890
|
"\n\n--- SOURCE MATERIAL ---\n\n",
|
|
@@ -554,8 +896,13 @@ function parseConcepts(toolOutput) {
|
|
|
554
896
|
const parsed = JSON.parse(toolOutput);
|
|
555
897
|
const concepts = parsed.concepts ?? [];
|
|
556
898
|
return concepts.filter(
|
|
557
|
-
(c) => typeof c.concept === "string" && typeof c.summary === "string" && typeof c.is_new === "boolean"
|
|
558
|
-
)
|
|
899
|
+
(c) => typeof c.concept === "string" && typeof c.summary === "string" && typeof c.is_new === "boolean" && (c.tags === void 0 || Array.isArray(c.tags))
|
|
900
|
+
).map((c) => ({
|
|
901
|
+
concept: c.concept,
|
|
902
|
+
summary: c.summary,
|
|
903
|
+
is_new: c.is_new,
|
|
904
|
+
tags: Array.isArray(c.tags) ? c.tags : void 0
|
|
905
|
+
}));
|
|
559
906
|
} catch {
|
|
560
907
|
return [];
|
|
561
908
|
}
|
|
@@ -564,13 +911,13 @@ function parseConcepts(toolOutput) {
|
|
|
564
911
|
// src/compiler/hasher.ts
|
|
565
912
|
import { createHash } from "crypto";
|
|
566
913
|
import { readFile as readFile5, readdir } from "fs/promises";
|
|
567
|
-
import
|
|
914
|
+
import path7 from "path";
|
|
568
915
|
async function hashFile(filePath) {
|
|
569
916
|
const content = await readFile5(filePath, "utf-8");
|
|
570
917
|
return createHash("sha256").update(content).digest("hex");
|
|
571
918
|
}
|
|
572
919
|
async function detectChanges(root, prevState) {
|
|
573
|
-
const sourcesPath =
|
|
920
|
+
const sourcesPath = path7.join(root, SOURCES_DIR);
|
|
574
921
|
const currentFiles = await listSourceFiles(sourcesPath);
|
|
575
922
|
const changes = [];
|
|
576
923
|
for (const file of currentFiles) {
|
|
@@ -590,7 +937,7 @@ async function listSourceFiles(sourcesPath) {
|
|
|
590
937
|
}
|
|
591
938
|
}
|
|
592
939
|
async function classifyFile(root, file, prevState) {
|
|
593
|
-
const filePath =
|
|
940
|
+
const filePath = path7.join(root, SOURCES_DIR, file);
|
|
594
941
|
const hash = await hashFile(filePath);
|
|
595
942
|
const prev = prevState.sources[file];
|
|
596
943
|
if (!prev) return "new";
|
|
@@ -617,28 +964,37 @@ function buildConceptToSourcesMap(sources) {
|
|
|
617
964
|
}
|
|
618
965
|
return conceptMap;
|
|
619
966
|
}
|
|
620
|
-
function
|
|
621
|
-
const
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
const deletedFiles = new Set(
|
|
625
|
-
directChanges.filter((c) => c.status === "deleted").map((c) => c.file)
|
|
967
|
+
function filesByStatus(changes, ...statuses) {
|
|
968
|
+
const statusSet = new Set(statuses);
|
|
969
|
+
return new Set(
|
|
970
|
+
changes.filter((c) => statusSet.has(c.status)).map((c) => c.file)
|
|
626
971
|
);
|
|
972
|
+
}
|
|
973
|
+
function collectSharedContributors(sourceFile, state, conceptMap, excludeSets, out) {
|
|
974
|
+
const sourceEntry = state.sources[sourceFile];
|
|
975
|
+
if (!sourceEntry) return;
|
|
976
|
+
for (const slug of sourceEntry.concepts) {
|
|
977
|
+
const contributors = conceptMap.get(slug);
|
|
978
|
+
if (!contributors || contributors.length < 2) continue;
|
|
979
|
+
for (const contributor of contributors) {
|
|
980
|
+
const isExcluded = excludeSets.some((s) => s.has(contributor));
|
|
981
|
+
if (!isExcluded) out.add(contributor);
|
|
982
|
+
}
|
|
983
|
+
}
|
|
984
|
+
}
|
|
985
|
+
function findAffectedSources(state, directChanges) {
|
|
986
|
+
const changedFiles = filesByStatus(directChanges, "new", "changed");
|
|
987
|
+
const deletedFiles = filesByStatus(directChanges, "deleted");
|
|
627
988
|
const conceptMap = buildConceptToSourcesMap(state.sources);
|
|
628
989
|
const affected = /* @__PURE__ */ new Set();
|
|
629
990
|
for (const changedFile of changedFiles) {
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
if (!skip) {
|
|
638
|
-
affected.add(contributor);
|
|
639
|
-
}
|
|
640
|
-
}
|
|
641
|
-
}
|
|
991
|
+
collectSharedContributors(
|
|
992
|
+
changedFile,
|
|
993
|
+
state,
|
|
994
|
+
conceptMap,
|
|
995
|
+
[changedFiles, deletedFiles, affected],
|
|
996
|
+
affected
|
|
997
|
+
);
|
|
642
998
|
}
|
|
643
999
|
return Array.from(affected);
|
|
644
1000
|
}
|
|
@@ -680,36 +1036,36 @@ async function persistFrozenSlugs(root, frozenSlugs, successfulExtractions) {
|
|
|
680
1036
|
const stateToSave = { ...currentState, frozenSlugs: Array.from(remaining) };
|
|
681
1037
|
await writeState(root, stateToSave);
|
|
682
1038
|
}
|
|
683
|
-
function
|
|
684
|
-
const compilingFiles = new Set(
|
|
685
|
-
allChanges.filter((c) => c.status === "new" || c.status === "changed").map((c) => c.file)
|
|
686
|
-
);
|
|
687
|
-
const deletedFiles = new Set(
|
|
688
|
-
allChanges.filter((c) => c.status === "deleted").map((c) => c.file)
|
|
689
|
-
);
|
|
690
|
-
const conceptMap = buildConceptToSourcesMap(state.sources);
|
|
1039
|
+
function collectFreshSlugs(extractions, state) {
|
|
691
1040
|
const freshSlugs = /* @__PURE__ */ new Set();
|
|
692
1041
|
for (const result of extractions) {
|
|
693
1042
|
const oldConcepts = new Set(state.sources[result.sourceFile]?.concepts ?? []);
|
|
694
1043
|
for (const c of result.concepts) {
|
|
695
1044
|
const slug = slugify(c.concept);
|
|
696
|
-
if (!oldConcepts.has(slug))
|
|
697
|
-
freshSlugs.add(slug);
|
|
698
|
-
}
|
|
1045
|
+
if (!oldConcepts.has(slug)) freshSlugs.add(slug);
|
|
699
1046
|
}
|
|
700
1047
|
}
|
|
1048
|
+
return freshSlugs;
|
|
1049
|
+
}
|
|
1050
|
+
function findSlugOwners(slugs, conceptMap, excludeSets) {
|
|
701
1051
|
const affected = /* @__PURE__ */ new Set();
|
|
702
|
-
for (const slug of
|
|
1052
|
+
for (const slug of slugs) {
|
|
703
1053
|
const owners = conceptMap.get(slug);
|
|
704
1054
|
if (!owners) continue;
|
|
705
1055
|
for (const owner of owners) {
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
}
|
|
1056
|
+
const isExcluded = excludeSets.some((s) => s.has(owner));
|
|
1057
|
+
if (!isExcluded) affected.add(owner);
|
|
709
1058
|
}
|
|
710
1059
|
}
|
|
711
1060
|
return Array.from(affected);
|
|
712
1061
|
}
|
|
1062
|
+
function findLateAffectedSources(extractions, state, allChanges) {
|
|
1063
|
+
const compilingFiles = filesByStatus(allChanges, "new", "changed");
|
|
1064
|
+
const deletedFiles = filesByStatus(allChanges, "deleted");
|
|
1065
|
+
const conceptMap = buildConceptToSourcesMap(state.sources);
|
|
1066
|
+
const freshSlugs = collectFreshSlugs(extractions, state);
|
|
1067
|
+
return findSlugOwners(freshSlugs, conceptMap, [compilingFiles, deletedFiles]);
|
|
1068
|
+
}
|
|
713
1069
|
function findSharedConcepts(sourceFile, state) {
|
|
714
1070
|
const shared = /* @__PURE__ */ new Set();
|
|
715
1071
|
const sourceEntry = state.sources[sourceFile];
|
|
@@ -739,7 +1095,7 @@ async function freezeFailedExtractions(root, results, frozenSlugs) {
|
|
|
739
1095
|
}
|
|
740
1096
|
|
|
741
1097
|
// src/compiler/orphan.ts
|
|
742
|
-
import
|
|
1098
|
+
import path8 from "path";
|
|
743
1099
|
async function markOrphaned(root, sourceFile, state) {
|
|
744
1100
|
const sourceEntry = state.sources[sourceFile];
|
|
745
1101
|
if (!sourceEntry) return;
|
|
@@ -765,7 +1121,7 @@ async function orphanUnownedFrozenPages(root, frozenSlugs) {
|
|
|
765
1121
|
}
|
|
766
1122
|
}
|
|
767
1123
|
async function orphanPage(root, slug, reason) {
|
|
768
|
-
const pagePath =
|
|
1124
|
+
const pagePath = path8.join(root, CONCEPTS_DIR, `${slug}.md`);
|
|
769
1125
|
const content = await safeReadFile(pagePath);
|
|
770
1126
|
if (!content) return;
|
|
771
1127
|
const { meta } = parseFrontmatter(content);
|
|
@@ -777,16 +1133,16 @@ async function orphanPage(root, slug, reason) {
|
|
|
777
1133
|
|
|
778
1134
|
// src/compiler/resolver.ts
|
|
779
1135
|
import { readdir as readdir2, readFile as readFile6 } from "fs/promises";
|
|
780
|
-
import
|
|
1136
|
+
import path9 from "path";
|
|
781
1137
|
import { existsSync as existsSync2 } from "fs";
|
|
782
1138
|
async function buildTitleIndex(root) {
|
|
783
|
-
const conceptsDir =
|
|
1139
|
+
const conceptsDir = path9.join(root, CONCEPTS_DIR);
|
|
784
1140
|
if (!existsSync2(conceptsDir)) return [];
|
|
785
1141
|
const files = await readdir2(conceptsDir);
|
|
786
1142
|
const pages = [];
|
|
787
1143
|
for (const file of files) {
|
|
788
1144
|
if (!file.endsWith(".md")) continue;
|
|
789
|
-
const filePath =
|
|
1145
|
+
const filePath = path9.join(conceptsDir, file);
|
|
790
1146
|
const content = await readFile6(filePath, "utf-8");
|
|
791
1147
|
const { meta } = parseFrontmatter(content);
|
|
792
1148
|
if (meta.title && typeof meta.title === "string" && !meta.orphaned) {
|
|
@@ -806,25 +1162,41 @@ function isInsideWikilink(text, position) {
|
|
|
806
1162
|
const closeBefore = text.indexOf("]]", before);
|
|
807
1163
|
return closeBefore >= position;
|
|
808
1164
|
}
|
|
1165
|
+
function isInsideCitation(text, position) {
|
|
1166
|
+
const before = text.lastIndexOf("^[", position);
|
|
1167
|
+
const after = text.indexOf("]", position);
|
|
1168
|
+
if (before === -1 || after === -1) return false;
|
|
1169
|
+
const closeBefore = text.indexOf("]", before);
|
|
1170
|
+
return closeBefore >= position;
|
|
1171
|
+
}
|
|
809
1172
|
function isWordBoundary(text, start, end) {
|
|
810
1173
|
const before = start === 0 || /[\s,.:;!?()\[\]{}/"']/.test(text[start - 1]);
|
|
811
1174
|
const after = end >= text.length || /[\s,.:;!?()\[\]{}/"']/.test(text[end]);
|
|
812
1175
|
return before && after;
|
|
813
1176
|
}
|
|
1177
|
+
function findTitleMatches(text, title) {
|
|
1178
|
+
const escaped = title.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1179
|
+
const regex = new RegExp(escaped, "gi");
|
|
1180
|
+
const matches = [];
|
|
1181
|
+
let match;
|
|
1182
|
+
while ((match = regex.exec(text)) !== null) {
|
|
1183
|
+
matches.push({ start: match.index, end: match.index + match[0].length });
|
|
1184
|
+
}
|
|
1185
|
+
return matches;
|
|
1186
|
+
}
|
|
1187
|
+
function isLinkablePosition(text, start, end) {
|
|
1188
|
+
if (isInsideWikilink(text, start)) return false;
|
|
1189
|
+
if (isInsideCitation(text, start)) return false;
|
|
1190
|
+
return isWordBoundary(text, start, end);
|
|
1191
|
+
}
|
|
814
1192
|
function addWikilinks(body, titles, selfTitle) {
|
|
815
1193
|
let result = body;
|
|
1194
|
+
const selfLower = selfTitle.toLowerCase();
|
|
816
1195
|
for (const page of titles) {
|
|
817
|
-
if (page.title.toLowerCase() ===
|
|
818
|
-
const
|
|
819
|
-
const regex = new RegExp(escaped, "gi");
|
|
820
|
-
let match;
|
|
821
|
-
const matches = [];
|
|
822
|
-
while ((match = regex.exec(result)) !== null) {
|
|
823
|
-
matches.push({ start: match.index, end: match.index + match[0].length });
|
|
824
|
-
}
|
|
1196
|
+
if (page.title.toLowerCase() === selfLower) continue;
|
|
1197
|
+
const matches = findTitleMatches(result, page.title);
|
|
825
1198
|
for (const m of matches.reverse()) {
|
|
826
|
-
if (
|
|
827
|
-
if (!isWordBoundary(result, m.start, m.end)) continue;
|
|
1199
|
+
if (!isLinkablePosition(result, m.start, m.end)) continue;
|
|
828
1200
|
result = result.slice(0, m.start) + `[[${page.title}]]` + result.slice(m.end);
|
|
829
1201
|
}
|
|
830
1202
|
}
|
|
@@ -880,41 +1252,43 @@ async function linkPage(page, titleIndex) {
|
|
|
880
1252
|
|
|
881
1253
|
// src/compiler/indexgen.ts
|
|
882
1254
|
import { readdir as readdir3 } from "fs/promises";
|
|
883
|
-
import
|
|
1255
|
+
import path10 from "path";
|
|
884
1256
|
async function generateIndex(root) {
|
|
885
1257
|
status("*", info("Generating index..."));
|
|
886
|
-
const conceptsPath =
|
|
887
|
-
const queriesPath =
|
|
1258
|
+
const conceptsPath = path10.join(root, CONCEPTS_DIR);
|
|
1259
|
+
const queriesPath = path10.join(root, QUERIES_DIR);
|
|
888
1260
|
const concepts = await collectPageSummaries(conceptsPath);
|
|
889
1261
|
const queries = await collectPageSummaries(queriesPath);
|
|
890
1262
|
concepts.sort((a, b) => a.title.localeCompare(b.title));
|
|
891
1263
|
queries.sort((a, b) => a.title.localeCompare(b.title));
|
|
892
1264
|
const indexContent = buildIndexContent(concepts, queries);
|
|
893
|
-
const indexPath =
|
|
1265
|
+
const indexPath = path10.join(root, INDEX_FILE);
|
|
894
1266
|
await atomicWrite(indexPath, indexContent);
|
|
895
1267
|
const total = concepts.length + queries.length;
|
|
896
1268
|
status("+", success(`Index updated with ${total} pages.`));
|
|
897
1269
|
}
|
|
898
|
-
async function
|
|
1270
|
+
async function scanWikiPages(dirPath) {
|
|
899
1271
|
let files;
|
|
900
1272
|
try {
|
|
901
|
-
files = await readdir3(
|
|
1273
|
+
files = await readdir3(dirPath);
|
|
902
1274
|
} catch {
|
|
903
1275
|
return [];
|
|
904
1276
|
}
|
|
905
|
-
const
|
|
1277
|
+
const scanned = [];
|
|
906
1278
|
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
907
|
-
const content = await safeReadFile(
|
|
1279
|
+
const content = await safeReadFile(path10.join(dirPath, file));
|
|
908
1280
|
const { meta } = parseFrontmatter(content);
|
|
909
|
-
|
|
910
|
-
pages.push({
|
|
911
|
-
title: meta.title,
|
|
912
|
-
slug: file.replace(/\.md$/, ""),
|
|
913
|
-
summary: typeof meta.summary === "string" ? meta.summary : ""
|
|
914
|
-
});
|
|
915
|
-
}
|
|
1281
|
+
scanned.push({ slug: file.replace(/\.md$/, ""), meta });
|
|
916
1282
|
}
|
|
917
|
-
return
|
|
1283
|
+
return scanned;
|
|
1284
|
+
}
|
|
1285
|
+
async function collectPageSummaries(conceptsPath) {
|
|
1286
|
+
const scanned = await scanWikiPages(conceptsPath);
|
|
1287
|
+
return scanned.filter(({ meta }) => meta.title && typeof meta.title === "string" && !meta.orphaned).map(({ slug, meta }) => ({
|
|
1288
|
+
title: meta.title,
|
|
1289
|
+
slug,
|
|
1290
|
+
summary: typeof meta.summary === "string" ? meta.summary : ""
|
|
1291
|
+
}));
|
|
918
1292
|
}
|
|
919
1293
|
function stripWikilinks(text) {
|
|
920
1294
|
return text.replace(/\[\[([^\]]+)\]\]/g, "$1");
|
|
@@ -937,84 +1311,378 @@ function buildIndexContent(concepts, queries) {
|
|
|
937
1311
|
return lines.join("\n");
|
|
938
1312
|
}
|
|
939
1313
|
|
|
1314
|
+
// src/compiler/obsidian.ts
|
|
1315
|
+
import { readdir as readdir4 } from "fs/promises";
|
|
1316
|
+
import path11 from "path";
|
|
1317
|
+
var ABBREVIATION_MIN_WORDS = 3;
|
|
1318
|
+
var SWAP_CONJUNCTIONS = [" and ", " or "];
|
|
1319
|
+
function addObsidianMeta(frontmatter, conceptTitle, tags) {
|
|
1320
|
+
frontmatter.tags = tags;
|
|
1321
|
+
frontmatter.aliases = generateAliases(conceptTitle);
|
|
1322
|
+
}
|
|
1323
|
+
function generateAliases(title) {
|
|
1324
|
+
const aliases = [];
|
|
1325
|
+
const slug = slugify(title);
|
|
1326
|
+
if (slug !== title) {
|
|
1327
|
+
aliases.push(slug);
|
|
1328
|
+
}
|
|
1329
|
+
const swapAlias = generateSwapAlias(title);
|
|
1330
|
+
if (swapAlias) {
|
|
1331
|
+
aliases.push(swapAlias);
|
|
1332
|
+
}
|
|
1333
|
+
const abbreviation = generateAbbreviation(title);
|
|
1334
|
+
if (abbreviation) {
|
|
1335
|
+
aliases.push(abbreviation);
|
|
1336
|
+
}
|
|
1337
|
+
return aliases;
|
|
1338
|
+
}
|
|
1339
|
+
function generateSwapAlias(title) {
|
|
1340
|
+
for (const conjunction of SWAP_CONJUNCTIONS) {
|
|
1341
|
+
const index = title.toLowerCase().indexOf(conjunction);
|
|
1342
|
+
if (index === -1) continue;
|
|
1343
|
+
const before = title.slice(0, index);
|
|
1344
|
+
const after = title.slice(index + conjunction.length);
|
|
1345
|
+
const originalConjunction = title.slice(index, index + conjunction.length);
|
|
1346
|
+
return `${after}${originalConjunction}${before}`;
|
|
1347
|
+
}
|
|
1348
|
+
return null;
|
|
1349
|
+
}
|
|
1350
|
+
function generateAbbreviation(title) {
|
|
1351
|
+
const words = title.split(/\s+/);
|
|
1352
|
+
if (words.length < ABBREVIATION_MIN_WORDS) return null;
|
|
1353
|
+
const abbreviation = words.map((w) => w[0].toUpperCase()).join("");
|
|
1354
|
+
if (abbreviation === title) return null;
|
|
1355
|
+
return abbreviation;
|
|
1356
|
+
}
|
|
1357
|
+
async function generateMOC(root) {
|
|
1358
|
+
const conceptsPath = path11.join(root, CONCEPTS_DIR);
|
|
1359
|
+
const pages = await loadConceptPages(conceptsPath);
|
|
1360
|
+
const tagGroups = groupPagesByTag(pages);
|
|
1361
|
+
const content = buildMOCContent(tagGroups);
|
|
1362
|
+
await atomicWrite(path11.join(root, MOC_FILE), content);
|
|
1363
|
+
}
|
|
1364
|
+
async function loadConceptPages(conceptsPath) {
|
|
1365
|
+
let files;
|
|
1366
|
+
try {
|
|
1367
|
+
files = await readdir4(conceptsPath);
|
|
1368
|
+
} catch {
|
|
1369
|
+
return [];
|
|
1370
|
+
}
|
|
1371
|
+
const pages = [];
|
|
1372
|
+
for (const file of files) {
|
|
1373
|
+
if (!file.endsWith(".md")) continue;
|
|
1374
|
+
const content = await safeReadFile(path11.join(conceptsPath, file));
|
|
1375
|
+
if (!content) continue;
|
|
1376
|
+
const { meta } = parseFrontmatter(content);
|
|
1377
|
+
if (meta.orphaned) continue;
|
|
1378
|
+
const title = typeof meta.title === "string" ? meta.title : file.replace(/\.md$/, "");
|
|
1379
|
+
const tags = Array.isArray(meta.tags) ? meta.tags : [];
|
|
1380
|
+
pages.push({ title, tags });
|
|
1381
|
+
}
|
|
1382
|
+
return pages;
|
|
1383
|
+
}
|
|
1384
|
+
function groupPagesByTag(pages) {
|
|
1385
|
+
const groups = /* @__PURE__ */ new Map();
|
|
1386
|
+
for (const page of pages) {
|
|
1387
|
+
if (page.tags.length === 0) {
|
|
1388
|
+
appendToGroup(groups, "Uncategorized", page.title);
|
|
1389
|
+
continue;
|
|
1390
|
+
}
|
|
1391
|
+
for (const tag of page.tags) {
|
|
1392
|
+
appendToGroup(groups, tag, page.title);
|
|
1393
|
+
}
|
|
1394
|
+
}
|
|
1395
|
+
return groups;
|
|
1396
|
+
}
|
|
1397
|
+
function appendToGroup(groups, key, title) {
|
|
1398
|
+
const existing = groups.get(key);
|
|
1399
|
+
if (existing) {
|
|
1400
|
+
existing.push(title);
|
|
1401
|
+
} else {
|
|
1402
|
+
groups.set(key, [title]);
|
|
1403
|
+
}
|
|
1404
|
+
}
|
|
1405
|
+
function buildMOCContent(tagGroups) {
|
|
1406
|
+
const lines = ["# Map of Content", ""];
|
|
1407
|
+
const sortedTags = [...tagGroups.keys()].sort((a, b) => {
|
|
1408
|
+
if (a === "Uncategorized") return 1;
|
|
1409
|
+
if (b === "Uncategorized") return -1;
|
|
1410
|
+
return a.localeCompare(b);
|
|
1411
|
+
});
|
|
1412
|
+
for (const tag of sortedTags) {
|
|
1413
|
+
const titles = tagGroups.get(tag) ?? [];
|
|
1414
|
+
lines.push(`## ${tag}`, "");
|
|
1415
|
+
for (const title of titles.sort()) {
|
|
1416
|
+
lines.push(`- [[${title}]]`);
|
|
1417
|
+
}
|
|
1418
|
+
lines.push("");
|
|
1419
|
+
}
|
|
1420
|
+
return lines.join("\n");
|
|
1421
|
+
}
|
|
1422
|
+
|
|
1423
|
+
// src/utils/embeddings.ts
|
|
1424
|
+
import { readFile as readFile7, readdir as readdir5 } from "fs/promises";
|
|
1425
|
+
import { existsSync as existsSync3 } from "fs";
|
|
1426
|
+
import path12 from "path";
|
|
1427
|
+
function cosineSimilarity(a, b) {
|
|
1428
|
+
if (a.length !== b.length || a.length === 0) return 0;
|
|
1429
|
+
let dot = 0;
|
|
1430
|
+
let magA = 0;
|
|
1431
|
+
let magB = 0;
|
|
1432
|
+
for (let i = 0; i < a.length; i++) {
|
|
1433
|
+
dot += a[i] * b[i];
|
|
1434
|
+
magA += a[i] * a[i];
|
|
1435
|
+
magB += b[i] * b[i];
|
|
1436
|
+
}
|
|
1437
|
+
if (magA === 0 || magB === 0) return 0;
|
|
1438
|
+
return dot / (Math.sqrt(magA) * Math.sqrt(magB));
|
|
1439
|
+
}
|
|
1440
|
+
function findTopK(queryVec, store, k) {
|
|
1441
|
+
const scored = store.entries.map((entry) => ({
|
|
1442
|
+
entry,
|
|
1443
|
+
score: cosineSimilarity(queryVec, entry.vector)
|
|
1444
|
+
}));
|
|
1445
|
+
scored.sort((left, right) => right.score - left.score);
|
|
1446
|
+
return scored.slice(0, k).map((item) => item.entry);
|
|
1447
|
+
}
|
|
1448
|
+
async function readEmbeddingStore(root) {
|
|
1449
|
+
const filePath = path12.join(root, EMBEDDINGS_FILE);
|
|
1450
|
+
if (!existsSync3(filePath)) return null;
|
|
1451
|
+
const raw = await readFile7(filePath, "utf-8");
|
|
1452
|
+
return JSON.parse(raw);
|
|
1453
|
+
}
|
|
1454
|
+
async function writeEmbeddingStore(root, store) {
|
|
1455
|
+
const filePath = path12.join(root, EMBEDDINGS_FILE);
|
|
1456
|
+
await atomicWrite(filePath, JSON.stringify(store, null, 2));
|
|
1457
|
+
}
|
|
1458
|
+
async function findRelevantPages(root, question) {
|
|
1459
|
+
const store = await readEmbeddingStore(root);
|
|
1460
|
+
if (!store || store.entries.length === 0) return [];
|
|
1461
|
+
const queryVec = await getProvider().embed(question);
|
|
1462
|
+
return findTopK(queryVec, store, EMBEDDING_TOP_K).map((entry) => ({
|
|
1463
|
+
slug: entry.slug,
|
|
1464
|
+
title: entry.title,
|
|
1465
|
+
summary: entry.summary
|
|
1466
|
+
}));
|
|
1467
|
+
}
|
|
1468
|
+
async function collectPageRecords(root) {
|
|
1469
|
+
const records = [];
|
|
1470
|
+
for (const dir of [CONCEPTS_DIR, QUERIES_DIR]) {
|
|
1471
|
+
const absDir = path12.join(root, dir);
|
|
1472
|
+
let files;
|
|
1473
|
+
try {
|
|
1474
|
+
files = await readdir5(absDir);
|
|
1475
|
+
} catch {
|
|
1476
|
+
continue;
|
|
1477
|
+
}
|
|
1478
|
+
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
1479
|
+
const content = await safeReadFile(path12.join(absDir, file));
|
|
1480
|
+
const { meta } = parseFrontmatter(content);
|
|
1481
|
+
if (meta.orphaned || typeof meta.title !== "string") continue;
|
|
1482
|
+
records.push({
|
|
1483
|
+
slug: file.replace(/\.md$/, ""),
|
|
1484
|
+
title: meta.title,
|
|
1485
|
+
summary: typeof meta.summary === "string" ? meta.summary : ""
|
|
1486
|
+
});
|
|
1487
|
+
}
|
|
1488
|
+
}
|
|
1489
|
+
return records;
|
|
1490
|
+
}
|
|
1491
|
+
function buildEmbeddingText(record) {
|
|
1492
|
+
return record.summary ? `${record.title}
|
|
1493
|
+
|
|
1494
|
+
${record.summary}` : record.title;
|
|
1495
|
+
}
|
|
1496
|
+
async function embedPages(records, slugsToEmbed) {
|
|
1497
|
+
const provider = getProvider();
|
|
1498
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1499
|
+
const fresh = [];
|
|
1500
|
+
for (const record of records) {
|
|
1501
|
+
if (!slugsToEmbed.has(record.slug)) continue;
|
|
1502
|
+
const vector = await provider.embed(buildEmbeddingText(record));
|
|
1503
|
+
fresh.push({
|
|
1504
|
+
slug: record.slug,
|
|
1505
|
+
title: record.title,
|
|
1506
|
+
summary: record.summary,
|
|
1507
|
+
vector,
|
|
1508
|
+
updatedAt: now
|
|
1509
|
+
});
|
|
1510
|
+
}
|
|
1511
|
+
return fresh;
|
|
1512
|
+
}
|
|
1513
|
+
function resolveEmbeddingModel() {
|
|
1514
|
+
return EMBEDDING_MODELS[getActiveProviderName()] ?? EMBEDDING_MODELS.anthropic;
|
|
1515
|
+
}
|
|
1516
|
+
function mergeEntries(existing, fresh, liveSlugs) {
|
|
1517
|
+
const bySlug = /* @__PURE__ */ new Map();
|
|
1518
|
+
for (const entry of existing) {
|
|
1519
|
+
if (liveSlugs.has(entry.slug)) bySlug.set(entry.slug, entry);
|
|
1520
|
+
}
|
|
1521
|
+
for (const entry of fresh) {
|
|
1522
|
+
bySlug.set(entry.slug, entry);
|
|
1523
|
+
}
|
|
1524
|
+
return Array.from(bySlug.values());
|
|
1525
|
+
}
|
|
1526
|
+
async function updateEmbeddings(root, changedSlugs) {
|
|
1527
|
+
const records = await collectPageRecords(root);
|
|
1528
|
+
const liveSlugs = new Set(records.map((r) => r.slug));
|
|
1529
|
+
const toEmbed = new Set(changedSlugs.filter((slug) => liveSlugs.has(slug)));
|
|
1530
|
+
const existingStore = await readEmbeddingStore(root);
|
|
1531
|
+
const previousEntries = existingStore?.entries ?? [];
|
|
1532
|
+
if (!existingStore) {
|
|
1533
|
+
for (const record of records) toEmbed.add(record.slug);
|
|
1534
|
+
}
|
|
1535
|
+
if (toEmbed.size === 0 && previousEntries.every((e) => liveSlugs.has(e.slug))) {
|
|
1536
|
+
return;
|
|
1537
|
+
}
|
|
1538
|
+
const freshEntries = await embedPages(records, toEmbed);
|
|
1539
|
+
const mergedEntries = mergeEntries(previousEntries, freshEntries, liveSlugs);
|
|
1540
|
+
const dimensions = mergedEntries[0]?.vector.length ?? 0;
|
|
1541
|
+
const store = {
|
|
1542
|
+
version: 1,
|
|
1543
|
+
model: resolveEmbeddingModel(),
|
|
1544
|
+
dimensions,
|
|
1545
|
+
entries: mergedEntries
|
|
1546
|
+
};
|
|
1547
|
+
await writeEmbeddingStore(root, store);
|
|
1548
|
+
status("*", dim(`Embeddings updated (${mergedEntries.length} pages).`));
|
|
1549
|
+
}
|
|
1550
|
+
|
|
940
1551
|
// src/compiler/index.ts
|
|
941
1552
|
import pLimit from "p-limit";
|
|
1553
|
+
function emptyCompileResult() {
|
|
1554
|
+
return { compiled: 0, skipped: 0, deleted: 0, concepts: [], pages: [], errors: [] };
|
|
1555
|
+
}
|
|
942
1556
|
async function compile(root) {
|
|
1557
|
+
await compileAndReport(root);
|
|
1558
|
+
}
|
|
1559
|
+
async function compileAndReport(root) {
|
|
943
1560
|
header("llmwiki compile");
|
|
944
1561
|
const locked = await acquireLock(root);
|
|
945
1562
|
if (!locked) {
|
|
946
1563
|
status("!", error("Could not acquire lock. Try again later."));
|
|
947
|
-
return
|
|
1564
|
+
return {
|
|
1565
|
+
...emptyCompileResult(),
|
|
1566
|
+
errors: ["Could not acquire .llmwiki/lock \u2014 another compile is in progress."]
|
|
1567
|
+
};
|
|
948
1568
|
}
|
|
949
1569
|
try {
|
|
950
|
-
await runCompilePipeline(root);
|
|
1570
|
+
return await runCompilePipeline(root);
|
|
951
1571
|
} finally {
|
|
952
1572
|
await releaseLock(root);
|
|
953
1573
|
}
|
|
954
1574
|
}
|
|
1575
|
+
function bucketChanges(changes) {
|
|
1576
|
+
return {
|
|
1577
|
+
toCompile: changes.filter((c) => c.status === "new" || c.status === "changed"),
|
|
1578
|
+
deleted: changes.filter((c) => c.status === "deleted"),
|
|
1579
|
+
unchanged: changes.filter((c) => c.status === "unchanged")
|
|
1580
|
+
};
|
|
1581
|
+
}
|
|
1582
|
+
async function generatePagesPhase(root, extractions, frozenSlugs) {
|
|
1583
|
+
const merged = mergeExtractions(extractions, frozenSlugs);
|
|
1584
|
+
const limit = pLimit(COMPILE_CONCURRENCY);
|
|
1585
|
+
const errors = [];
|
|
1586
|
+
const pages = await Promise.all(
|
|
1587
|
+
merged.map((entry) => limit(async () => {
|
|
1588
|
+
const writeError = await generateMergedPage(root, entry);
|
|
1589
|
+
if (writeError) errors.push(writeError);
|
|
1590
|
+
return entry;
|
|
1591
|
+
}))
|
|
1592
|
+
);
|
|
1593
|
+
return { pages, errors };
|
|
1594
|
+
}
|
|
1595
|
+
async function persistExtractionStates(root, extractions) {
|
|
1596
|
+
for (const result of extractions) {
|
|
1597
|
+
if (result.concepts.length === 0) continue;
|
|
1598
|
+
await persistSourceState(root, result.sourcePath, result.sourceFile, result.concepts);
|
|
1599
|
+
}
|
|
1600
|
+
}
|
|
1601
|
+
function summarizeCompile(buckets, generation, extractions) {
|
|
1602
|
+
header("Compilation complete");
|
|
1603
|
+
status("\u2713", success(
|
|
1604
|
+
`${buckets.toCompile.length} compiled, ${buckets.unchanged.length} skipped, ${buckets.deleted.length} deleted`
|
|
1605
|
+
));
|
|
1606
|
+
if (buckets.toCompile.length > 0) {
|
|
1607
|
+
status("\u2192", dim('Next: llmwiki query "your question here"'));
|
|
1608
|
+
}
|
|
1609
|
+
const errors = [...generation.errors];
|
|
1610
|
+
for (const result of extractions) {
|
|
1611
|
+
if (result.concepts.length === 0) {
|
|
1612
|
+
errors.push(`No concepts extracted from ${result.sourceFile}`);
|
|
1613
|
+
}
|
|
1614
|
+
}
|
|
1615
|
+
return {
|
|
1616
|
+
compiled: buckets.toCompile.length,
|
|
1617
|
+
skipped: buckets.unchanged.length,
|
|
1618
|
+
deleted: buckets.deleted.length,
|
|
1619
|
+
concepts: generation.pages.map((entry) => entry.concept.concept),
|
|
1620
|
+
pages: generation.pages.map((entry) => entry.slug),
|
|
1621
|
+
errors
|
|
1622
|
+
};
|
|
1623
|
+
}
|
|
955
1624
|
async function runCompilePipeline(root) {
|
|
956
1625
|
const state = await readState(root);
|
|
957
1626
|
const changes = await detectChanges(root, state);
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
changes.push({ file, status: "changed" });
|
|
962
|
-
}
|
|
963
|
-
const toCompile = changes.filter((c) => c.status === "new" || c.status === "changed");
|
|
964
|
-
const deleted = changes.filter((c) => c.status === "deleted");
|
|
965
|
-
const unchanged = changes.filter((c) => c.status === "unchanged");
|
|
966
|
-
if (toCompile.length === 0 && deleted.length === 0) {
|
|
1627
|
+
augmentWithAffectedSources(changes, findAffectedSources(state, changes));
|
|
1628
|
+
const buckets = bucketChanges(changes);
|
|
1629
|
+
if (buckets.toCompile.length === 0 && buckets.deleted.length === 0) {
|
|
967
1630
|
status("\u2713", success("Nothing to compile \u2014 all sources up to date."));
|
|
968
|
-
return;
|
|
1631
|
+
return { ...emptyCompileResult(), skipped: buckets.unchanged.length };
|
|
969
1632
|
}
|
|
970
1633
|
printChangesSummary(changes);
|
|
1634
|
+
await markDeletedAsOrphaned(root, buckets.deleted, state);
|
|
1635
|
+
const frozenSlugs = findFrozenSlugs(state, changes);
|
|
1636
|
+
reportFrozenSlugs(frozenSlugs);
|
|
1637
|
+
const extractions = await runExtractionPhases(root, buckets.toCompile, state, changes);
|
|
1638
|
+
await freezeFailedExtractions(root, extractions, frozenSlugs);
|
|
1639
|
+
const generation = await generatePagesPhase(root, extractions, frozenSlugs);
|
|
1640
|
+
await persistExtractionStates(root, extractions);
|
|
1641
|
+
if (frozenSlugs.size > 0) {
|
|
1642
|
+
await orphanUnownedFrozenPages(root, frozenSlugs);
|
|
1643
|
+
}
|
|
1644
|
+
await persistFrozenSlugs(root, frozenSlugs, extractions);
|
|
1645
|
+
await finalizeWiki(root, generation.pages);
|
|
1646
|
+
return summarizeCompile(buckets, generation, extractions);
|
|
1647
|
+
}
|
|
1648
|
+
function augmentWithAffectedSources(changes, affected) {
|
|
1649
|
+
for (const file of affected) {
|
|
1650
|
+
status("~", info(`${file} [affected by shared concept]`));
|
|
1651
|
+
changes.push({ file, status: "changed" });
|
|
1652
|
+
}
|
|
1653
|
+
}
|
|
1654
|
+
async function markDeletedAsOrphaned(root, deleted, state) {
|
|
971
1655
|
for (const del of deleted) {
|
|
972
1656
|
await markOrphaned(root, del.file, state);
|
|
973
1657
|
}
|
|
974
|
-
|
|
1658
|
+
}
|
|
1659
|
+
function reportFrozenSlugs(frozenSlugs) {
|
|
975
1660
|
for (const slug of frozenSlugs) {
|
|
976
1661
|
status("i", dim(`Frozen: ${slug} (shared with deleted source)`));
|
|
977
1662
|
}
|
|
1663
|
+
}
|
|
1664
|
+
async function runExtractionPhases(root, toCompile, state, allChanges) {
|
|
978
1665
|
const extractions = [];
|
|
979
1666
|
for (const change of toCompile) {
|
|
980
1667
|
extractions.push(await extractForSource(root, change.file));
|
|
981
1668
|
}
|
|
982
|
-
const lateAffected = findLateAffectedSources(extractions, state,
|
|
1669
|
+
const lateAffected = findLateAffectedSources(extractions, state, allChanges);
|
|
983
1670
|
for (const file of lateAffected) {
|
|
984
1671
|
status("~", info(`${file} [shares concept with new source]`));
|
|
985
1672
|
extractions.push(await extractForSource(root, file));
|
|
986
1673
|
}
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
const
|
|
991
|
-
|
|
992
|
-
await generateMergedPage(root, entry);
|
|
993
|
-
return entry;
|
|
994
|
-
}))
|
|
995
|
-
);
|
|
996
|
-
const allChangedSlugs = pageResults.map((e) => e.slug);
|
|
997
|
-
const allNewSlugs = pageResults.filter((e) => e.concept.is_new).map((e) => e.slug);
|
|
998
|
-
for (const result of extractions) {
|
|
999
|
-
if (result.concepts.length === 0) continue;
|
|
1000
|
-
await persistSourceState(root, result.sourcePath, result.sourceFile, result.concepts);
|
|
1001
|
-
}
|
|
1002
|
-
if (frozenSlugs.size > 0) {
|
|
1003
|
-
await orphanUnownedFrozenPages(root, frozenSlugs);
|
|
1004
|
-
}
|
|
1005
|
-
await persistFrozenSlugs(root, frozenSlugs, extractions);
|
|
1674
|
+
return extractions;
|
|
1675
|
+
}
|
|
1676
|
+
async function finalizeWiki(root, pages) {
|
|
1677
|
+
const allChangedSlugs = pages.map((entry) => entry.slug);
|
|
1678
|
+
const allNewSlugs = pages.filter((entry) => entry.concept.is_new).map((entry) => entry.slug);
|
|
1006
1679
|
if (allChangedSlugs.length > 0) {
|
|
1007
1680
|
status("\u{1F517}", info("Resolving interlinks..."));
|
|
1008
1681
|
await resolveLinks(root, allChangedSlugs, allNewSlugs);
|
|
1009
1682
|
}
|
|
1010
1683
|
await generateIndex(root);
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
`${toCompile.length} compiled, ${unchanged.length} skipped, ${deleted.length} deleted`
|
|
1014
|
-
));
|
|
1015
|
-
if (toCompile.length > 0) {
|
|
1016
|
-
status("\u2192", dim('Next: llmwiki query "your question here"'));
|
|
1017
|
-
}
|
|
1684
|
+
await generateMOC(root);
|
|
1685
|
+
await safelyUpdateEmbeddings(root, allChangedSlugs);
|
|
1018
1686
|
}
|
|
1019
1687
|
function printChangesSummary(changes) {
|
|
1020
1688
|
const iconMap = {
|
|
@@ -1036,20 +1704,23 @@ function printChangesSummary(changes) {
|
|
|
1036
1704
|
}
|
|
1037
1705
|
}
|
|
1038
1706
|
async function extractForSource(root, sourceFile) {
|
|
1039
|
-
|
|
1040
|
-
const sourcePath =
|
|
1041
|
-
const sourceContent = await
|
|
1042
|
-
const existingIndex = await safeReadFile(
|
|
1707
|
+
status("*", info(`Extracting: ${sourceFile}`));
|
|
1708
|
+
const sourcePath = path13.join(root, SOURCES_DIR, sourceFile);
|
|
1709
|
+
const sourceContent = await readFile8(sourcePath, "utf-8");
|
|
1710
|
+
const existingIndex = await safeReadFile(path13.join(root, INDEX_FILE));
|
|
1043
1711
|
const concepts = await extractConcepts(sourceContent, existingIndex);
|
|
1044
|
-
if (concepts.length > 0)
|
|
1712
|
+
if (concepts.length > 0) {
|
|
1713
|
+
const names = concepts.map((c) => c.concept).join(", ");
|
|
1714
|
+
status("*", dim(` Found ${concepts.length} concepts: ${names}`));
|
|
1715
|
+
}
|
|
1045
1716
|
return { sourceFile, sourcePath, sourceContent, concepts };
|
|
1046
1717
|
}
|
|
1047
1718
|
function mergeExtractions(extractions, frozenSlugs) {
|
|
1048
1719
|
const bySlug = /* @__PURE__ */ new Map();
|
|
1049
1720
|
for (const result of extractions) {
|
|
1050
1721
|
if (result.concepts.length === 0) continue;
|
|
1051
|
-
for (const
|
|
1052
|
-
const slug = slugify(
|
|
1722
|
+
for (const concept of result.concepts) {
|
|
1723
|
+
const slug = slugify(concept.concept);
|
|
1053
1724
|
if (frozenSlugs.has(slug)) continue;
|
|
1054
1725
|
const existing = bySlug.get(slug);
|
|
1055
1726
|
if (existing) {
|
|
@@ -1062,7 +1733,7 @@ ${result.sourceContent}`;
|
|
|
1062
1733
|
} else {
|
|
1063
1734
|
bySlug.set(slug, {
|
|
1064
1735
|
slug,
|
|
1065
|
-
concept
|
|
1736
|
+
concept,
|
|
1066
1737
|
sourceFiles: [result.sourceFile],
|
|
1067
1738
|
combinedContent: `--- SOURCE: ${result.sourceFile} ---
|
|
1068
1739
|
|
|
@@ -1074,10 +1745,9 @@ ${result.sourceContent}`
|
|
|
1074
1745
|
return Array.from(bySlug.values());
|
|
1075
1746
|
}
|
|
1076
1747
|
async function generateMergedPage(root, entry) {
|
|
1077
|
-
const pagePath =
|
|
1748
|
+
const pagePath = path13.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
|
|
1078
1749
|
const existingPage = await safeReadFile(pagePath);
|
|
1079
1750
|
const relatedPages = await loadRelatedPages(root, entry.slug);
|
|
1080
|
-
status(">", info(`Generating: ${entry.concept.concept}`));
|
|
1081
1751
|
const system = buildPagePrompt(
|
|
1082
1752
|
entry.concept.concept,
|
|
1083
1753
|
entry.combinedContent,
|
|
@@ -1088,29 +1758,27 @@ async function generateMergedPage(root, entry) {
|
|
|
1088
1758
|
system,
|
|
1089
1759
|
messages: [
|
|
1090
1760
|
{ role: "user", content: `Write the wiki page for "${entry.concept.concept}".` }
|
|
1091
|
-
]
|
|
1092
|
-
stream: true,
|
|
1093
|
-
onToken: (token) => process.stdout.write(dim(token))
|
|
1761
|
+
]
|
|
1094
1762
|
});
|
|
1095
|
-
process.stdout.write("\n");
|
|
1096
1763
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1097
1764
|
const existing = existingPage ? parseFrontmatter(existingPage) : null;
|
|
1098
1765
|
const createdAt = existing?.meta.createdAt && typeof existing.meta.createdAt === "string" ? existing.meta.createdAt : now;
|
|
1099
|
-
const
|
|
1766
|
+
const frontmatterFields = {
|
|
1100
1767
|
title: entry.concept.concept,
|
|
1101
1768
|
summary: entry.concept.summary,
|
|
1102
1769
|
sources: entry.sourceFiles,
|
|
1103
1770
|
createdAt,
|
|
1104
1771
|
updatedAt: now
|
|
1105
|
-
}
|
|
1772
|
+
};
|
|
1773
|
+
addObsidianMeta(frontmatterFields, entry.concept.concept, entry.concept.tags ?? []);
|
|
1774
|
+
const frontmatter = buildFrontmatter(frontmatterFields);
|
|
1106
1775
|
const fullPage = `${frontmatter}
|
|
1107
1776
|
|
|
1108
1777
|
${pageBody}
|
|
1109
1778
|
`;
|
|
1110
|
-
await writePageIfValid(pagePath, fullPage, entry.concept.concept);
|
|
1779
|
+
return await writePageIfValid(pagePath, fullPage, entry.concept.concept);
|
|
1111
1780
|
}
|
|
1112
1781
|
async function extractConcepts(sourceContent, existingIndex) {
|
|
1113
|
-
status("*", info("Extracting concepts..."));
|
|
1114
1782
|
const system = buildExtractionPrompt(sourceContent, existingIndex);
|
|
1115
1783
|
const rawOutput = await callClaude({
|
|
1116
1784
|
system,
|
|
@@ -1119,24 +1787,18 @@ async function extractConcepts(sourceContent, existingIndex) {
|
|
|
1119
1787
|
});
|
|
1120
1788
|
return parseConcepts(rawOutput);
|
|
1121
1789
|
}
|
|
1122
|
-
function logExtractedConcepts(concepts) {
|
|
1123
|
-
for (const c of concepts) {
|
|
1124
|
-
const tag = c.is_new ? success("NEW") : dim("update");
|
|
1125
|
-
status("*", `${concept(c.concept)} [${tag}] \u2014 ${c.summary}`);
|
|
1126
|
-
}
|
|
1127
|
-
}
|
|
1128
1790
|
async function loadRelatedPages(root, excludeSlug) {
|
|
1129
|
-
const conceptsPath =
|
|
1791
|
+
const conceptsPath = path13.join(root, CONCEPTS_DIR);
|
|
1130
1792
|
let files;
|
|
1131
1793
|
try {
|
|
1132
|
-
files = await
|
|
1794
|
+
files = await readdir6(conceptsPath);
|
|
1133
1795
|
} catch {
|
|
1134
1796
|
return "";
|
|
1135
1797
|
}
|
|
1136
1798
|
const related = files.filter((f) => f.endsWith(".md") && f !== `${excludeSlug}.md`).slice(0, 5);
|
|
1137
1799
|
const contents = [];
|
|
1138
1800
|
for (const f of related) {
|
|
1139
|
-
const content = await safeReadFile(
|
|
1801
|
+
const content = await safeReadFile(path13.join(conceptsPath, f));
|
|
1140
1802
|
if (!content) continue;
|
|
1141
1803
|
const { meta } = parseFrontmatter(content);
|
|
1142
1804
|
if (meta.orphaned) continue;
|
|
@@ -1147,10 +1809,18 @@ async function loadRelatedPages(root, excludeSlug) {
|
|
|
1147
1809
|
async function writePageIfValid(pagePath, content, conceptTitle) {
|
|
1148
1810
|
if (!validateWikiPage(content)) {
|
|
1149
1811
|
status("!", warn(`Invalid page for "${conceptTitle}" \u2014 skipped.`));
|
|
1150
|
-
return
|
|
1812
|
+
return `Invalid page for "${conceptTitle}" \u2014 failed validation`;
|
|
1151
1813
|
}
|
|
1152
1814
|
await atomicWrite(pagePath, content);
|
|
1153
|
-
|
|
1815
|
+
return null;
|
|
1816
|
+
}
|
|
1817
|
+
async function safelyUpdateEmbeddings(root, changedSlugs) {
|
|
1818
|
+
try {
|
|
1819
|
+
await updateEmbeddings(root, changedSlugs);
|
|
1820
|
+
} catch (err) {
|
|
1821
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
1822
|
+
status("!", warn(`Skipped embeddings update: ${message}`));
|
|
1823
|
+
}
|
|
1154
1824
|
}
|
|
1155
1825
|
async function persistSourceState(root, sourcePath, sourceFile, concepts) {
|
|
1156
1826
|
const hash = await hashFile(sourcePath);
|
|
@@ -1164,7 +1834,7 @@ async function persistSourceState(root, sourcePath, sourceFile, concepts) {
|
|
|
1164
1834
|
|
|
1165
1835
|
// src/commands/compile.ts
|
|
1166
1836
|
async function compileCommand() {
|
|
1167
|
-
if (!
|
|
1837
|
+
if (!existsSync4(SOURCES_DIR)) {
|
|
1168
1838
|
status(
|
|
1169
1839
|
"!",
|
|
1170
1840
|
warn("No sources found. Run `llmwiki ingest <url>` first.")
|
|
@@ -1175,8 +1845,8 @@ async function compileCommand() {
|
|
|
1175
1845
|
}
|
|
1176
1846
|
|
|
1177
1847
|
// src/commands/query.ts
|
|
1178
|
-
import { existsSync as
|
|
1179
|
-
import
|
|
1848
|
+
import { existsSync as existsSync5 } from "fs";
|
|
1849
|
+
import path14 from "path";
|
|
1180
1850
|
var PAGE_DIRS = [CONCEPTS_DIR, QUERIES_DIR];
|
|
1181
1851
|
var PAGE_SELECTION_TOOL = {
|
|
1182
1852
|
name: "select_pages",
|
|
@@ -1221,12 +1891,35 @@ ${indexContent}`;
|
|
|
1221
1891
|
return { pages: [], reasoning: "Failed to parse page selection response" };
|
|
1222
1892
|
}
|
|
1223
1893
|
}
|
|
1894
|
+
function buildFilteredIndex(candidates) {
|
|
1895
|
+
return candidates.map((entry) => `- **${entry.slug}**: ${entry.title} \u2014 ${entry.summary}`).join("\n");
|
|
1896
|
+
}
|
|
1897
|
+
async function selectRelevantPages(root, question) {
|
|
1898
|
+
const candidates = await tryFindRelevantPages(root, question);
|
|
1899
|
+
if (candidates.length > 0) {
|
|
1900
|
+
const filteredIndex = buildFilteredIndex(candidates);
|
|
1901
|
+
const { pages: rawPages2, reasoning: reasoning2 } = await selectPages(question, filteredIndex);
|
|
1902
|
+
return { pages: rawPages2, rawPages: rawPages2, reasoning: reasoning2 };
|
|
1903
|
+
}
|
|
1904
|
+
const indexContent = await safeReadFile(path14.join(root, INDEX_FILE));
|
|
1905
|
+
const { pages: rawPages, reasoning } = await selectPages(question, indexContent);
|
|
1906
|
+
return { pages: rawPages.map((p) => slugify(p)), rawPages, reasoning };
|
|
1907
|
+
}
|
|
1908
|
+
async function tryFindRelevantPages(root, question) {
|
|
1909
|
+
try {
|
|
1910
|
+
return await findRelevantPages(root, question);
|
|
1911
|
+
} catch (err) {
|
|
1912
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
1913
|
+
status("!", dim(`Semantic pre-filter unavailable (${message}); using full index.`));
|
|
1914
|
+
return [];
|
|
1915
|
+
}
|
|
1916
|
+
}
|
|
1224
1917
|
async function loadSelectedPages(root, slugs) {
|
|
1225
1918
|
const sections = [];
|
|
1226
1919
|
for (const slug of slugs) {
|
|
1227
1920
|
let content = "";
|
|
1228
1921
|
for (const dir of PAGE_DIRS) {
|
|
1229
|
-
const candidate = await safeReadFile(
|
|
1922
|
+
const candidate = await safeReadFile(path14.join(root, dir, `${slug}.md`));
|
|
1230
1923
|
if (!candidate) continue;
|
|
1231
1924
|
const { meta } = parseFrontmatter(candidate);
|
|
1232
1925
|
if (meta.orphaned) continue;
|
|
@@ -1242,20 +1935,18 @@ ${content}`);
|
|
|
1242
1935
|
}
|
|
1243
1936
|
return sections.join("\n\n");
|
|
1244
1937
|
}
|
|
1245
|
-
|
|
1246
|
-
|
|
1938
|
+
var ANSWER_SYSTEM_PROMPT = "You are a knowledge assistant. Answer the question using ONLY the wiki content provided. Cite specific pages using [[Page Title]] wikilinks. If the wiki doesn't contain enough information, say so.";
|
|
1939
|
+
async function callAnswerLLM(question, pagesContent, onToken) {
|
|
1247
1940
|
const userMessage = `Question: ${question}
|
|
1248
1941
|
|
|
1249
1942
|
Relevant wiki pages:
|
|
1250
1943
|
${pagesContent}`;
|
|
1251
|
-
|
|
1252
|
-
system:
|
|
1944
|
+
return callClaude({
|
|
1945
|
+
system: ANSWER_SYSTEM_PROMPT,
|
|
1253
1946
|
messages: [{ role: "user", content: userMessage }],
|
|
1254
|
-
stream:
|
|
1255
|
-
onToken
|
|
1947
|
+
stream: Boolean(onToken),
|
|
1948
|
+
onToken
|
|
1256
1949
|
});
|
|
1257
|
-
process.stdout.write("\n");
|
|
1258
|
-
return answer;
|
|
1259
1950
|
}
|
|
1260
1951
|
function summarizeAnswer(answer) {
|
|
1261
1952
|
const firstLine = answer.trim().split(/\n/)[0] ?? "";
|
|
@@ -1264,7 +1955,7 @@ function summarizeAnswer(answer) {
|
|
|
1264
1955
|
}
|
|
1265
1956
|
async function saveQueryPage(root, question, answer) {
|
|
1266
1957
|
const slug = slugify(question);
|
|
1267
|
-
const filePath =
|
|
1958
|
+
const filePath = path14.join(root, QUERIES_DIR, `${slug}.md`);
|
|
1268
1959
|
const frontmatter = buildFrontmatter({
|
|
1269
1960
|
title: question,
|
|
1270
1961
|
summary: summarizeAnswer(answer),
|
|
@@ -1281,27 +1972,52 @@ ${answer}
|
|
|
1281
1972
|
success(`Saved query \u2192 ${source(filePath)}`)
|
|
1282
1973
|
);
|
|
1283
1974
|
await generateIndex(root);
|
|
1975
|
+
try {
|
|
1976
|
+
await updateEmbeddings(root, [slug]);
|
|
1977
|
+
} catch (err) {
|
|
1978
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
1979
|
+
status("!", warn(`Skipped embeddings update: ${message}`));
|
|
1980
|
+
}
|
|
1981
|
+
return slug;
|
|
1982
|
+
}
|
|
1983
|
+
async function generateAnswer(root, question, options = {}) {
|
|
1984
|
+
if (!existsSync5(path14.join(root, INDEX_FILE))) {
|
|
1985
|
+
throw new Error("Wiki index not found. Run `llmwiki compile` first.");
|
|
1986
|
+
}
|
|
1987
|
+
const { pages, reasoning } = await selectRelevantPages(root, question);
|
|
1988
|
+
options.onPageSelection?.(pages, reasoning);
|
|
1989
|
+
const pagesContent = await loadSelectedPages(root, pages);
|
|
1990
|
+
if (!pagesContent) {
|
|
1991
|
+
return { answer: "", selectedPages: pages, reasoning };
|
|
1992
|
+
}
|
|
1993
|
+
const answer = await callAnswerLLM(question, pagesContent, options.onToken);
|
|
1994
|
+
let saved;
|
|
1995
|
+
if (options.save) {
|
|
1996
|
+
saved = await saveQueryPage(root, question, answer);
|
|
1997
|
+
}
|
|
1998
|
+
return { answer, selectedPages: pages, reasoning, saved };
|
|
1284
1999
|
}
|
|
1285
2000
|
async function queryCommand(root, question, options) {
|
|
1286
|
-
if (!
|
|
2001
|
+
if (!existsSync5(path14.join(root, INDEX_FILE))) {
|
|
1287
2002
|
status("!", error("Wiki index not found. Run `llmwiki compile` first."));
|
|
1288
2003
|
return;
|
|
1289
2004
|
}
|
|
1290
2005
|
header("Selecting relevant pages");
|
|
1291
|
-
const
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
2006
|
+
const result = await generateAnswer(root, question, {
|
|
2007
|
+
save: options.save,
|
|
2008
|
+
onToken: (text) => process.stdout.write(text),
|
|
2009
|
+
onPageSelection: (pages, reasoning) => {
|
|
2010
|
+
status("i", dim(`Reasoning: ${reasoning}`));
|
|
2011
|
+
status("*", info(`Selected ${pages.length} page(s): ${pages.join(", ")}`));
|
|
2012
|
+
header("Generating answer");
|
|
2013
|
+
}
|
|
2014
|
+
});
|
|
2015
|
+
process.stdout.write("\n");
|
|
2016
|
+
if (!result.answer) {
|
|
1299
2017
|
status("!", error("No matching pages found. Try refining your question."));
|
|
1300
2018
|
return;
|
|
1301
2019
|
}
|
|
1302
|
-
|
|
1303
|
-
if (options.save) {
|
|
1304
|
-
await saveQueryPage(root, question, answer);
|
|
2020
|
+
if (result.saved) {
|
|
1305
2021
|
status("\u2192", dim("Saved. Future queries will use this answer as context."));
|
|
1306
2022
|
} else {
|
|
1307
2023
|
status("\u2192", dim("Tip: use --save to add this answer to your wiki"));
|
|
@@ -1310,12 +2026,12 @@ async function queryCommand(root, question, options) {
|
|
|
1310
2026
|
|
|
1311
2027
|
// src/commands/watch.ts
|
|
1312
2028
|
import { watch as chokidarWatch } from "chokidar";
|
|
1313
|
-
import { existsSync as
|
|
1314
|
-
import
|
|
2029
|
+
import { existsSync as existsSync6 } from "fs";
|
|
2030
|
+
import path15 from "path";
|
|
1315
2031
|
var DEBOUNCE_MS = 500;
|
|
1316
2032
|
async function watchCommand() {
|
|
1317
|
-
const sourcesPath =
|
|
1318
|
-
if (!
|
|
2033
|
+
const sourcesPath = path15.resolve(SOURCES_DIR);
|
|
2034
|
+
if (!existsSync6(sourcesPath)) {
|
|
1319
2035
|
status(
|
|
1320
2036
|
"!",
|
|
1321
2037
|
warn("No sources/ directory found. Run `llmwiki ingest <url>` first.")
|
|
@@ -1349,7 +2065,7 @@ async function watchCommand() {
|
|
|
1349
2065
|
const scheduleCompile = (eventPath, event) => {
|
|
1350
2066
|
status(
|
|
1351
2067
|
"~",
|
|
1352
|
-
dim(`${event}: ${
|
|
2068
|
+
dim(`${event}: ${path15.basename(eventPath)}`)
|
|
1353
2069
|
);
|
|
1354
2070
|
if (debounceTimer) clearTimeout(debounceTimer);
|
|
1355
2071
|
debounceTimer = setTimeout(triggerCompile, DEBOUNCE_MS);
|
|
@@ -1363,6 +2079,609 @@ async function watchCommand() {
|
|
|
1363
2079
|
});
|
|
1364
2080
|
}
|
|
1365
2081
|
|
|
2082
|
+
// src/linter/rules.ts
|
|
2083
|
+
import { readdir as readdir7, readFile as readFile9 } from "fs/promises";
|
|
2084
|
+
import { existsSync as existsSync7 } from "fs";
|
|
2085
|
+
import path16 from "path";
|
|
2086
|
+
var MIN_BODY_LENGTH = 50;
|
|
2087
|
+
var WIKILINK_PATTERN = /\[\[([^\]]+)\]\]/g;
|
|
2088
|
+
var CITATION_PATTERN = /\^\[([^\]]+)\]/g;
|
|
2089
|
+
function findMatchesInContent(content, pattern) {
|
|
2090
|
+
const results = [];
|
|
2091
|
+
const lines = content.split("\n");
|
|
2092
|
+
for (let i = 0; i < lines.length; i++) {
|
|
2093
|
+
const matches = lines[i].matchAll(pattern);
|
|
2094
|
+
for (const match of matches) {
|
|
2095
|
+
results.push({ captured: match[1], line: i + 1 });
|
|
2096
|
+
}
|
|
2097
|
+
}
|
|
2098
|
+
return results;
|
|
2099
|
+
}
|
|
2100
|
+
async function readMarkdownFiles(dirPath) {
|
|
2101
|
+
if (!existsSync7(dirPath)) return [];
|
|
2102
|
+
const entries = await readdir7(dirPath);
|
|
2103
|
+
const mdFiles = entries.filter((f) => f.endsWith(".md"));
|
|
2104
|
+
const results = await Promise.all(
|
|
2105
|
+
mdFiles.map(async (fileName) => {
|
|
2106
|
+
const filePath = path16.join(dirPath, fileName);
|
|
2107
|
+
const content = await readFile9(filePath, "utf-8");
|
|
2108
|
+
return { filePath, content };
|
|
2109
|
+
})
|
|
2110
|
+
);
|
|
2111
|
+
return results;
|
|
2112
|
+
}
|
|
2113
|
+
async function collectAllPages(root) {
|
|
2114
|
+
const conceptPages = await readMarkdownFiles(path16.join(root, CONCEPTS_DIR));
|
|
2115
|
+
const queryPages = await readMarkdownFiles(path16.join(root, QUERIES_DIR));
|
|
2116
|
+
return [...conceptPages, ...queryPages];
|
|
2117
|
+
}
|
|
2118
|
+
function buildPageSlugSet(pages) {
|
|
2119
|
+
const slugs = /* @__PURE__ */ new Set();
|
|
2120
|
+
for (const page of pages) {
|
|
2121
|
+
const baseName = path16.basename(page.filePath, ".md");
|
|
2122
|
+
slugs.add(baseName.toLowerCase());
|
|
2123
|
+
}
|
|
2124
|
+
return slugs;
|
|
2125
|
+
}
|
|
2126
|
+
async function checkBrokenWikilinks(root) {
|
|
2127
|
+
const pages = await collectAllPages(root);
|
|
2128
|
+
const existingSlugs = buildPageSlugSet(pages);
|
|
2129
|
+
const results = [];
|
|
2130
|
+
for (const page of pages) {
|
|
2131
|
+
for (const { captured, line } of findMatchesInContent(page.content, WIKILINK_PATTERN)) {
|
|
2132
|
+
const linkSlug = slugify(captured);
|
|
2133
|
+
if (!existingSlugs.has(linkSlug)) {
|
|
2134
|
+
results.push({
|
|
2135
|
+
rule: "broken-wikilink",
|
|
2136
|
+
severity: "error",
|
|
2137
|
+
file: page.filePath,
|
|
2138
|
+
message: `Broken wikilink [[${captured}]] \u2014 no matching page found`,
|
|
2139
|
+
line
|
|
2140
|
+
});
|
|
2141
|
+
}
|
|
2142
|
+
}
|
|
2143
|
+
}
|
|
2144
|
+
return results;
|
|
2145
|
+
}
|
|
2146
|
+
async function checkOrphanedPages(root) {
|
|
2147
|
+
const pages = await collectAllPages(root);
|
|
2148
|
+
const results = [];
|
|
2149
|
+
for (const page of pages) {
|
|
2150
|
+
const { meta } = parseFrontmatter(page.content);
|
|
2151
|
+
if (meta.orphaned === true) {
|
|
2152
|
+
results.push({
|
|
2153
|
+
rule: "orphaned-page",
|
|
2154
|
+
severity: "warning",
|
|
2155
|
+
file: page.filePath,
|
|
2156
|
+
message: `Page is marked as orphaned`
|
|
2157
|
+
});
|
|
2158
|
+
}
|
|
2159
|
+
}
|
|
2160
|
+
return results;
|
|
2161
|
+
}
|
|
2162
|
+
async function checkMissingSummaries(root) {
|
|
2163
|
+
const pages = await collectAllPages(root);
|
|
2164
|
+
const results = [];
|
|
2165
|
+
for (const page of pages) {
|
|
2166
|
+
const { meta } = parseFrontmatter(page.content);
|
|
2167
|
+
const summary = meta.summary;
|
|
2168
|
+
const isMissing = !summary || typeof summary === "string" && summary.trim() === "";
|
|
2169
|
+
if (isMissing) {
|
|
2170
|
+
results.push({
|
|
2171
|
+
rule: "missing-summary",
|
|
2172
|
+
severity: "warning",
|
|
2173
|
+
file: page.filePath,
|
|
2174
|
+
message: `Page has no summary in frontmatter`
|
|
2175
|
+
});
|
|
2176
|
+
}
|
|
2177
|
+
}
|
|
2178
|
+
return results;
|
|
2179
|
+
}
|
|
2180
|
+
async function checkDuplicateConcepts(root) {
|
|
2181
|
+
const pages = await collectAllPages(root);
|
|
2182
|
+
const titleMap = /* @__PURE__ */ new Map();
|
|
2183
|
+
for (const page of pages) {
|
|
2184
|
+
const { meta } = parseFrontmatter(page.content);
|
|
2185
|
+
const title = typeof meta.title === "string" ? meta.title : "";
|
|
2186
|
+
if (!title) continue;
|
|
2187
|
+
const normalizedTitle = title.toLowerCase().trim();
|
|
2188
|
+
const existing = titleMap.get(normalizedTitle) ?? [];
|
|
2189
|
+
existing.push(page.filePath);
|
|
2190
|
+
titleMap.set(normalizedTitle, existing);
|
|
2191
|
+
}
|
|
2192
|
+
const results = [];
|
|
2193
|
+
for (const [title, files] of titleMap) {
|
|
2194
|
+
if (files.length <= 1) continue;
|
|
2195
|
+
for (const file of files) {
|
|
2196
|
+
results.push({
|
|
2197
|
+
rule: "duplicate-concept",
|
|
2198
|
+
severity: "error",
|
|
2199
|
+
file,
|
|
2200
|
+
message: `Duplicate title "${title}" \u2014 also in ${files.filter((f) => f !== file).join(", ")}`
|
|
2201
|
+
});
|
|
2202
|
+
}
|
|
2203
|
+
}
|
|
2204
|
+
return results;
|
|
2205
|
+
}
|
|
2206
|
+
async function checkEmptyPages(root) {
|
|
2207
|
+
const pages = await collectAllPages(root);
|
|
2208
|
+
const results = [];
|
|
2209
|
+
for (const page of pages) {
|
|
2210
|
+
const { meta, body } = parseFrontmatter(page.content);
|
|
2211
|
+
const hasTitle = typeof meta.title === "string" && meta.title.trim() !== "";
|
|
2212
|
+
const isBodyEmpty = body.trim().length < MIN_BODY_LENGTH;
|
|
2213
|
+
if (hasTitle && isBodyEmpty) {
|
|
2214
|
+
results.push({
|
|
2215
|
+
rule: "empty-page",
|
|
2216
|
+
severity: "warning",
|
|
2217
|
+
file: page.filePath,
|
|
2218
|
+
message: `Page body is empty or too short (< ${MIN_BODY_LENGTH} chars)`
|
|
2219
|
+
});
|
|
2220
|
+
}
|
|
2221
|
+
}
|
|
2222
|
+
return results;
|
|
2223
|
+
}
|
|
2224
|
+
async function checkBrokenCitations(root) {
|
|
2225
|
+
const pages = await collectAllPages(root);
|
|
2226
|
+
const sourcesDir = path16.join(root, SOURCES_DIR);
|
|
2227
|
+
const results = [];
|
|
2228
|
+
for (const page of pages) {
|
|
2229
|
+
for (const { captured, line } of findMatchesInContent(page.content, CITATION_PATTERN)) {
|
|
2230
|
+
const citedPath = path16.join(sourcesDir, captured);
|
|
2231
|
+
if (!existsSync7(citedPath)) {
|
|
2232
|
+
results.push({
|
|
2233
|
+
rule: "broken-citation",
|
|
2234
|
+
severity: "error",
|
|
2235
|
+
file: page.filePath,
|
|
2236
|
+
message: `Broken citation ^[${captured}] \u2014 source file not found`,
|
|
2237
|
+
line
|
|
2238
|
+
});
|
|
2239
|
+
}
|
|
2240
|
+
}
|
|
2241
|
+
}
|
|
2242
|
+
return results;
|
|
2243
|
+
}
|
|
2244
|
+
|
|
2245
|
+
// src/linter/index.ts
|
|
2246
|
+
var ALL_RULES = [
|
|
2247
|
+
checkBrokenWikilinks,
|
|
2248
|
+
checkOrphanedPages,
|
|
2249
|
+
checkMissingSummaries,
|
|
2250
|
+
checkDuplicateConcepts,
|
|
2251
|
+
checkEmptyPages,
|
|
2252
|
+
checkBrokenCitations
|
|
2253
|
+
];
|
|
2254
|
+
function countBySeverity(results, severity) {
|
|
2255
|
+
return results.filter((r) => r.severity === severity).length;
|
|
2256
|
+
}
|
|
2257
|
+
async function lint(root) {
|
|
2258
|
+
const ruleResults = await Promise.all(
|
|
2259
|
+
ALL_RULES.map((rule) => rule(root))
|
|
2260
|
+
);
|
|
2261
|
+
const results = ruleResults.flat();
|
|
2262
|
+
return {
|
|
2263
|
+
errors: countBySeverity(results, "error"),
|
|
2264
|
+
warnings: countBySeverity(results, "warning"),
|
|
2265
|
+
info: countBySeverity(results, "info"),
|
|
2266
|
+
results
|
|
2267
|
+
};
|
|
2268
|
+
}
|
|
2269
|
+
|
|
2270
|
+
// src/commands/lint.ts
|
|
2271
|
+
var SEVERITY_FORMATTERS = {
|
|
2272
|
+
error,
|
|
2273
|
+
warning: warn,
|
|
2274
|
+
info
|
|
2275
|
+
};
|
|
2276
|
+
var SEVERITY_ICONS = {
|
|
2277
|
+
error: "x",
|
|
2278
|
+
warning: "!",
|
|
2279
|
+
info: "i"
|
|
2280
|
+
};
|
|
2281
|
+
function printResult(result) {
|
|
2282
|
+
const formatter = SEVERITY_FORMATTERS[result.severity];
|
|
2283
|
+
const icon = SEVERITY_ICONS[result.severity];
|
|
2284
|
+
const location = result.line ? `${result.file}:${result.line}` : result.file;
|
|
2285
|
+
status(icon, `${formatter(result.severity)} ${dim(location)} ${result.message}`);
|
|
2286
|
+
}
|
|
2287
|
+
async function lintCommand() {
|
|
2288
|
+
header("Linting wiki");
|
|
2289
|
+
const summary = await lint(process.cwd());
|
|
2290
|
+
for (const result of summary.results) {
|
|
2291
|
+
printResult(result);
|
|
2292
|
+
}
|
|
2293
|
+
console.log();
|
|
2294
|
+
const summaryLine = [
|
|
2295
|
+
error(`${summary.errors} error(s)`),
|
|
2296
|
+
warn(`${summary.warnings} warning(s)`),
|
|
2297
|
+
info(`${summary.info} info`)
|
|
2298
|
+
].join(", ");
|
|
2299
|
+
status("*", summaryLine);
|
|
2300
|
+
if (summary.errors > 0) {
|
|
2301
|
+
process.exit(1);
|
|
2302
|
+
}
|
|
2303
|
+
}
|
|
2304
|
+
|
|
2305
|
+
// src/mcp/server.ts
|
|
2306
|
+
import { McpServer as McpServer2 } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2307
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
2308
|
+
|
|
2309
|
+
// src/mcp/tools.ts
|
|
2310
|
+
import path17 from "path";
|
|
2311
|
+
import { z } from "zod";
|
|
2312
|
+
|
|
2313
|
+
// src/mcp/provider-check.ts
|
|
2314
|
+
var PROVIDER_KEY_VARS = {
|
|
2315
|
+
anthropic: "ANTHROPIC_API_KEY",
|
|
2316
|
+
openai: "OPENAI_API_KEY",
|
|
2317
|
+
ollama: null,
|
|
2318
|
+
minimax: "MINIMAX_API_KEY"
|
|
2319
|
+
};
|
|
2320
|
+
function ensureProviderAvailable() {
|
|
2321
|
+
const provider = process.env.LLMWIKI_PROVIDER ?? DEFAULT_PROVIDER;
|
|
2322
|
+
if (provider === "anthropic") {
|
|
2323
|
+
const auth = resolveAnthropicAuthFromEnv();
|
|
2324
|
+
if (!auth.apiKey && !auth.authToken) {
|
|
2325
|
+
throw new Error(
|
|
2326
|
+
'Anthropic credentials are required for the "anthropic" provider. Set ANTHROPIC_API_KEY or ANTHROPIC_AUTH_TOKEN.'
|
|
2327
|
+
);
|
|
2328
|
+
}
|
|
2329
|
+
return;
|
|
2330
|
+
}
|
|
2331
|
+
const keyVar = PROVIDER_KEY_VARS[provider];
|
|
2332
|
+
if (keyVar === void 0) {
|
|
2333
|
+
throw new Error(
|
|
2334
|
+
`Unknown provider "${provider}". Supported: ${Object.keys(PROVIDER_KEY_VARS).join(", ")}`
|
|
2335
|
+
);
|
|
2336
|
+
}
|
|
2337
|
+
if (keyVar && !process.env[keyVar]) {
|
|
2338
|
+
throw new Error(
|
|
2339
|
+
`${keyVar} environment variable is required for the "${provider}" provider.`
|
|
2340
|
+
);
|
|
2341
|
+
}
|
|
2342
|
+
}
|
|
2343
|
+
|
|
2344
|
+
// src/mcp/tools.ts
|
|
2345
|
+
var PAGE_DIRS2 = [CONCEPTS_DIR, QUERIES_DIR];
|
|
2346
|
+
function jsonResult(payload) {
|
|
2347
|
+
return {
|
|
2348
|
+
content: [{ type: "text", text: JSON.stringify(payload, null, 2) }],
|
|
2349
|
+
structuredContent: { result: payload }
|
|
2350
|
+
};
|
|
2351
|
+
}
|
|
2352
|
+
function registerWikiTools(server, root) {
|
|
2353
|
+
registerIngestTool(server, root);
|
|
2354
|
+
registerCompileTool(server, root);
|
|
2355
|
+
registerQueryTool(server, root);
|
|
2356
|
+
registerSearchTool(server, root);
|
|
2357
|
+
registerReadTool(server, root);
|
|
2358
|
+
registerLintTool(server, root);
|
|
2359
|
+
registerStatusTool(server, root);
|
|
2360
|
+
}
|
|
2361
|
+
function registerIngestTool(server, root) {
|
|
2362
|
+
server.registerTool(
|
|
2363
|
+
"ingest_source",
|
|
2364
|
+
{
|
|
2365
|
+
title: "Ingest Source",
|
|
2366
|
+
description: "Fetch a URL or copy a local file into sources/. Returns the saved filename, character count, and whether content was truncated to fit the size limit.",
|
|
2367
|
+
inputSchema: {
|
|
2368
|
+
source: z.string().describe("URL (http/https) or absolute path to a .md/.txt file")
|
|
2369
|
+
}
|
|
2370
|
+
},
|
|
2371
|
+
async ({ source: source2 }) => {
|
|
2372
|
+
const previousCwd = process.cwd();
|
|
2373
|
+
try {
|
|
2374
|
+
process.chdir(root);
|
|
2375
|
+
const result = await ingestSource(source2);
|
|
2376
|
+
return jsonResult(result);
|
|
2377
|
+
} finally {
|
|
2378
|
+
process.chdir(previousCwd);
|
|
2379
|
+
}
|
|
2380
|
+
}
|
|
2381
|
+
);
|
|
2382
|
+
}
|
|
2383
|
+
function registerCompileTool(server, root) {
|
|
2384
|
+
server.registerTool(
|
|
2385
|
+
"compile_wiki",
|
|
2386
|
+
{
|
|
2387
|
+
title: "Compile Wiki",
|
|
2388
|
+
description: "Run the incremental compile pipeline: extract concepts from new/changed sources, generate wiki pages, resolve interlinks, and rebuild the index. Requires an LLM provider with credentials.",
|
|
2389
|
+
inputSchema: {}
|
|
2390
|
+
},
|
|
2391
|
+
async () => {
|
|
2392
|
+
ensureProviderAvailable();
|
|
2393
|
+
const result = await compileAndReport(root);
|
|
2394
|
+
return jsonResult(result);
|
|
2395
|
+
}
|
|
2396
|
+
);
|
|
2397
|
+
}
|
|
2398
|
+
function registerQueryTool(server, root) {
|
|
2399
|
+
server.registerTool(
|
|
2400
|
+
"query_wiki",
|
|
2401
|
+
{
|
|
2402
|
+
title: "Query Wiki",
|
|
2403
|
+
description: "Ask a natural-language question. Selects relevant pages with the LLM, loads them, and returns a grounded answer with citations. Set save=true to persist the answer as a wiki page. Requires an LLM provider.",
|
|
2404
|
+
inputSchema: {
|
|
2405
|
+
question: z.string().describe("The natural-language question to answer."),
|
|
2406
|
+
save: z.boolean().optional().describe("Persist the answer as a wiki/queries/ page when true.")
|
|
2407
|
+
}
|
|
2408
|
+
},
|
|
2409
|
+
async ({ question, save }) => {
|
|
2410
|
+
ensureProviderAvailable();
|
|
2411
|
+
const result = await generateAnswer(root, question, { save });
|
|
2412
|
+
return jsonResult(result);
|
|
2413
|
+
}
|
|
2414
|
+
);
|
|
2415
|
+
}
|
|
2416
|
+
function registerSearchTool(server, root) {
|
|
2417
|
+
server.registerTool(
|
|
2418
|
+
"search_pages",
|
|
2419
|
+
{
|
|
2420
|
+
title: "Search Pages",
|
|
2421
|
+
description: "Select pages relevant to a question and return their full content. Uses semantic embeddings when available, falling back to LLM-based selection over the wiki index. Requires an LLM provider.",
|
|
2422
|
+
inputSchema: {
|
|
2423
|
+
question: z.string().describe("The query used to rank pages.")
|
|
2424
|
+
}
|
|
2425
|
+
},
|
|
2426
|
+
async ({ question }) => {
|
|
2427
|
+
ensureProviderAvailable();
|
|
2428
|
+
const slugs = await pickSearchSlugs(root, question);
|
|
2429
|
+
const records = await loadPageRecords(root, slugs);
|
|
2430
|
+
return jsonResult({ pages: records });
|
|
2431
|
+
}
|
|
2432
|
+
);
|
|
2433
|
+
}
|
|
2434
|
+
async function pickSearchSlugs(root, question) {
|
|
2435
|
+
try {
|
|
2436
|
+
const candidates = await findRelevantPages(root, question);
|
|
2437
|
+
if (candidates.length > 0) return candidates.map((c) => c.slug);
|
|
2438
|
+
} catch {
|
|
2439
|
+
}
|
|
2440
|
+
const indexContent = await safeReadFile(path17.join(root, INDEX_FILE));
|
|
2441
|
+
const { pages } = await selectPages(question, indexContent);
|
|
2442
|
+
return pages;
|
|
2443
|
+
}
|
|
2444
|
+
function registerReadTool(server, root) {
|
|
2445
|
+
server.registerTool(
|
|
2446
|
+
"read_page",
|
|
2447
|
+
{
|
|
2448
|
+
title: "Read Page",
|
|
2449
|
+
description: "Read a single wiki page by slug. Searches concepts/ first, then queries/. Returns the parsed frontmatter and body. No LLM call required.",
|
|
2450
|
+
inputSchema: {
|
|
2451
|
+
slug: z.string().describe("Page slug, without .md extension.")
|
|
2452
|
+
}
|
|
2453
|
+
},
|
|
2454
|
+
async ({ slug }) => {
|
|
2455
|
+
const page = await readPage(root, slug);
|
|
2456
|
+
if (!page) {
|
|
2457
|
+
throw new Error(`Page not found: ${slug}`);
|
|
2458
|
+
}
|
|
2459
|
+
return jsonResult(page);
|
|
2460
|
+
}
|
|
2461
|
+
);
|
|
2462
|
+
}
|
|
2463
|
+
function registerLintTool(server, root) {
|
|
2464
|
+
server.registerTool(
|
|
2465
|
+
"lint_wiki",
|
|
2466
|
+
{
|
|
2467
|
+
title: "Lint Wiki",
|
|
2468
|
+
description: "Run rule-based quality checks (broken wikilinks, orphans, duplicates, empty pages, broken citations). Returns structured diagnostics. No LLM call.",
|
|
2469
|
+
inputSchema: {}
|
|
2470
|
+
},
|
|
2471
|
+
async () => {
|
|
2472
|
+
const summary = await lint(root);
|
|
2473
|
+
return jsonResult(summary);
|
|
2474
|
+
}
|
|
2475
|
+
);
|
|
2476
|
+
}
|
|
2477
|
+
function registerStatusTool(server, root) {
|
|
2478
|
+
server.registerTool(
|
|
2479
|
+
"wiki_status",
|
|
2480
|
+
{
|
|
2481
|
+
title: "Wiki Status",
|
|
2482
|
+
description: "Summarize the wiki: page count, source count, last compile time, orphaned pages, and pending source changes. Read-only \u2014 never modifies the workspace.",
|
|
2483
|
+
inputSchema: {}
|
|
2484
|
+
},
|
|
2485
|
+
async () => jsonResult(await collectStatus(root))
|
|
2486
|
+
);
|
|
2487
|
+
}
|
|
2488
|
+
async function collectStatus(root) {
|
|
2489
|
+
const concepts = await collectPageSummaries(path17.join(root, CONCEPTS_DIR));
|
|
2490
|
+
const queries = await collectPageSummaries(path17.join(root, QUERIES_DIR));
|
|
2491
|
+
const state = await readState(root);
|
|
2492
|
+
const changes = await detectChanges(root, state);
|
|
2493
|
+
const orphans = await findOrphanedSlugs(root);
|
|
2494
|
+
const compileTimes = Object.values(state.sources).map((s) => s.compiledAt);
|
|
2495
|
+
const lastCompile = compileTimes.length > 0 ? compileTimes.sort().slice(-1)[0] : null;
|
|
2496
|
+
return {
|
|
2497
|
+
pages: { concepts: concepts.length, queries: queries.length, total: concepts.length + queries.length },
|
|
2498
|
+
sources: Object.keys(state.sources).length,
|
|
2499
|
+
lastCompiledAt: lastCompile,
|
|
2500
|
+
orphanedPages: orphans,
|
|
2501
|
+
pendingChanges: changes.filter((c) => c.status !== "unchanged").map((c) => ({ file: c.file, status: c.status }))
|
|
2502
|
+
};
|
|
2503
|
+
}
|
|
2504
|
+
async function findOrphanedSlugs(root) {
|
|
2505
|
+
const scanned = await scanWikiPages(path17.join(root, CONCEPTS_DIR));
|
|
2506
|
+
return scanned.filter(({ meta }) => meta.orphaned).map(({ slug }) => slug);
|
|
2507
|
+
}
|
|
2508
|
+
async function loadPageRecords(root, slugs) {
|
|
2509
|
+
const records = [];
|
|
2510
|
+
for (const slug of slugs) {
|
|
2511
|
+
const page = await readPage(root, slug);
|
|
2512
|
+
if (page) records.push(page);
|
|
2513
|
+
}
|
|
2514
|
+
return records;
|
|
2515
|
+
}
|
|
2516
|
+
async function readPage(root, slug) {
|
|
2517
|
+
for (const dir of PAGE_DIRS2) {
|
|
2518
|
+
const content = await safeReadFile(path17.join(root, dir, `${slug}.md`));
|
|
2519
|
+
if (!content) continue;
|
|
2520
|
+
const { meta, body } = parseFrontmatter(content);
|
|
2521
|
+
if (meta.orphaned) continue;
|
|
2522
|
+
return {
|
|
2523
|
+
slug,
|
|
2524
|
+
title: typeof meta.title === "string" ? meta.title : slug,
|
|
2525
|
+
summary: typeof meta.summary === "string" ? meta.summary : "",
|
|
2526
|
+
body: body.trim()
|
|
2527
|
+
};
|
|
2528
|
+
}
|
|
2529
|
+
return null;
|
|
2530
|
+
}
|
|
2531
|
+
|
|
2532
|
+
// src/mcp/resources.ts
|
|
2533
|
+
import path18 from "path";
|
|
2534
|
+
import { readdir as readdir8 } from "fs/promises";
|
|
2535
|
+
import { ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2536
|
+
function jsonContent(uri, payload) {
|
|
2537
|
+
return {
|
|
2538
|
+
uri: uri.href,
|
|
2539
|
+
mimeType: "application/json",
|
|
2540
|
+
text: JSON.stringify(payload, null, 2)
|
|
2541
|
+
};
|
|
2542
|
+
}
|
|
2543
|
+
function markdownContent(uri, text) {
|
|
2544
|
+
return {
|
|
2545
|
+
uri: uri.href,
|
|
2546
|
+
mimeType: "text/markdown",
|
|
2547
|
+
text
|
|
2548
|
+
};
|
|
2549
|
+
}
|
|
2550
|
+
function registerWikiResources(server, root) {
|
|
2551
|
+
registerIndexResource(server, root);
|
|
2552
|
+
registerSourcesResource(server, root);
|
|
2553
|
+
registerStateResource(server, root);
|
|
2554
|
+
registerConceptResource(server, root);
|
|
2555
|
+
registerQueryResource(server, root);
|
|
2556
|
+
}
|
|
2557
|
+
function registerIndexResource(server, root) {
|
|
2558
|
+
server.registerResource(
|
|
2559
|
+
"wiki-index",
|
|
2560
|
+
"llmwiki://index",
|
|
2561
|
+
{
|
|
2562
|
+
title: "Wiki Index",
|
|
2563
|
+
description: "Full content of wiki/index.md (auto-generated table of contents).",
|
|
2564
|
+
mimeType: "text/markdown"
|
|
2565
|
+
},
|
|
2566
|
+
async (uri) => {
|
|
2567
|
+
const content = await safeReadFile(path18.join(root, INDEX_FILE));
|
|
2568
|
+
return { contents: [markdownContent(uri, content)] };
|
|
2569
|
+
}
|
|
2570
|
+
);
|
|
2571
|
+
}
|
|
2572
|
+
function registerSourcesResource(server, root) {
|
|
2573
|
+
server.registerResource(
|
|
2574
|
+
"wiki-sources",
|
|
2575
|
+
"llmwiki://sources",
|
|
2576
|
+
{
|
|
2577
|
+
title: "Wiki Sources",
|
|
2578
|
+
description: "List of ingested source files with frontmatter metadata.",
|
|
2579
|
+
mimeType: "application/json"
|
|
2580
|
+
},
|
|
2581
|
+
async (uri) => ({
|
|
2582
|
+
contents: [jsonContent(uri, await listSources(root))]
|
|
2583
|
+
})
|
|
2584
|
+
);
|
|
2585
|
+
}
|
|
2586
|
+
function registerStateResource(server, root) {
|
|
2587
|
+
server.registerResource(
|
|
2588
|
+
"wiki-state",
|
|
2589
|
+
"llmwiki://state",
|
|
2590
|
+
{
|
|
2591
|
+
title: "Compilation State",
|
|
2592
|
+
description: "Per-source hashes, concepts, and last compile times from .llmwiki/state.json.",
|
|
2593
|
+
mimeType: "application/json"
|
|
2594
|
+
},
|
|
2595
|
+
async (uri) => {
|
|
2596
|
+
const state = await readState(root);
|
|
2597
|
+
return { contents: [jsonContent(uri, state)] };
|
|
2598
|
+
}
|
|
2599
|
+
);
|
|
2600
|
+
}
|
|
2601
|
+
function registerConceptResource(server, root) {
|
|
2602
|
+
server.registerResource(
|
|
2603
|
+
"wiki-concept",
|
|
2604
|
+
new ResourceTemplate("llmwiki://concept/{slug}", {
|
|
2605
|
+
list: async () => listPagesUnder(root, CONCEPTS_DIR, "concept")
|
|
2606
|
+
}),
|
|
2607
|
+
{
|
|
2608
|
+
title: "Wiki Concept",
|
|
2609
|
+
description: "A single concept page from wiki/concepts/ \u2014 frontmatter plus body.",
|
|
2610
|
+
mimeType: "application/json"
|
|
2611
|
+
},
|
|
2612
|
+
async (uri, { slug }) => ({
|
|
2613
|
+
contents: [jsonContent(uri, await loadPageWithMeta(root, CONCEPTS_DIR, String(slug)))]
|
|
2614
|
+
})
|
|
2615
|
+
);
|
|
2616
|
+
}
|
|
2617
|
+
function registerQueryResource(server, root) {
|
|
2618
|
+
server.registerResource(
|
|
2619
|
+
"wiki-query",
|
|
2620
|
+
new ResourceTemplate("llmwiki://query/{slug}", {
|
|
2621
|
+
list: async () => listPagesUnder(root, QUERIES_DIR, "query")
|
|
2622
|
+
}),
|
|
2623
|
+
{
|
|
2624
|
+
title: "Wiki Query",
|
|
2625
|
+
description: "A single saved query page from wiki/queries/ \u2014 frontmatter plus body.",
|
|
2626
|
+
mimeType: "application/json"
|
|
2627
|
+
},
|
|
2628
|
+
async (uri, { slug }) => ({
|
|
2629
|
+
contents: [jsonContent(uri, await loadPageWithMeta(root, QUERIES_DIR, String(slug)))]
|
|
2630
|
+
})
|
|
2631
|
+
);
|
|
2632
|
+
}
|
|
2633
|
+
async function listSources(root) {
|
|
2634
|
+
const sourcesPath = path18.join(root, SOURCES_DIR);
|
|
2635
|
+
let files;
|
|
2636
|
+
try {
|
|
2637
|
+
files = await readdir8(sourcesPath);
|
|
2638
|
+
} catch {
|
|
2639
|
+
return [];
|
|
2640
|
+
}
|
|
2641
|
+
const records = [];
|
|
2642
|
+
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
2643
|
+
const content = await safeReadFile(path18.join(sourcesPath, file));
|
|
2644
|
+
const { meta } = parseFrontmatter(content);
|
|
2645
|
+
records.push({ filename: file, ...meta });
|
|
2646
|
+
}
|
|
2647
|
+
return records;
|
|
2648
|
+
}
|
|
2649
|
+
async function loadPageWithMeta(root, dir, slug) {
|
|
2650
|
+
const filePath = path18.join(root, dir, `${slug}.md`);
|
|
2651
|
+
const content = await safeReadFile(filePath);
|
|
2652
|
+
if (!content) {
|
|
2653
|
+
throw new Error(`Page not found: ${dir}/${slug}.md`);
|
|
2654
|
+
}
|
|
2655
|
+
const { meta, body } = parseFrontmatter(content);
|
|
2656
|
+
return { slug, meta, body: body.trim() };
|
|
2657
|
+
}
|
|
2658
|
+
async function listPagesUnder(root, dir, scheme) {
|
|
2659
|
+
const pagesPath = path18.join(root, dir);
|
|
2660
|
+
let files;
|
|
2661
|
+
try {
|
|
2662
|
+
files = await readdir8(pagesPath);
|
|
2663
|
+
} catch {
|
|
2664
|
+
return { resources: [] };
|
|
2665
|
+
}
|
|
2666
|
+
const resources = files.filter((f) => f.endsWith(".md")).map((f) => {
|
|
2667
|
+
const slug = f.replace(/\.md$/, "");
|
|
2668
|
+
return { uri: `llmwiki://${scheme}/${slug}`, name: slug };
|
|
2669
|
+
});
|
|
2670
|
+
return { resources };
|
|
2671
|
+
}
|
|
2672
|
+
|
|
2673
|
+
// src/mcp/server.ts
|
|
2674
|
+
async function startMCPServer(options) {
|
|
2675
|
+
const { root, version: version2 } = options;
|
|
2676
|
+
const server = new McpServer2({ name: "llmwiki", version: version2 }, {
|
|
2677
|
+
instructions: "llmwiki is a knowledge compiler. Use ingest_source to add raw sources, compile_wiki to run the LLM pipeline, query_wiki for grounded answers, and search_pages to retrieve relevant pages. read_page, lint_wiki, and wiki_status work without an API key."
|
|
2678
|
+
});
|
|
2679
|
+
registerWikiTools(server, root);
|
|
2680
|
+
registerWikiResources(server, root);
|
|
2681
|
+
const transport = new StdioServerTransport();
|
|
2682
|
+
await server.connect(transport);
|
|
2683
|
+
}
|
|
2684
|
+
|
|
1366
2685
|
// src/cli.ts
|
|
1367
2686
|
var require2 = createRequire(import.meta.url);
|
|
1368
2687
|
var { version } = require2("../package.json");
|
|
@@ -1377,8 +2696,8 @@ program.command("ingest <source>").description("Ingest a URL or local file into
|
|
|
1377
2696
|
}
|
|
1378
2697
|
});
|
|
1379
2698
|
program.command("compile").description("Compile sources/ into an interlinked wiki").action(async () => {
|
|
1380
|
-
requireApiKey();
|
|
1381
2699
|
try {
|
|
2700
|
+
requireProvider();
|
|
1382
2701
|
await compileCommand();
|
|
1383
2702
|
} catch (err) {
|
|
1384
2703
|
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
@@ -1386,8 +2705,8 @@ program.command("compile").description("Compile sources/ into an interlinked wik
|
|
|
1386
2705
|
}
|
|
1387
2706
|
});
|
|
1388
2707
|
program.command("query <question>").description("Ask a question against the wiki").option("--save", "Save the answer as a wiki page").action(async (question, options) => {
|
|
1389
|
-
requireApiKey();
|
|
1390
2708
|
try {
|
|
2709
|
+
requireProvider();
|
|
1391
2710
|
await queryCommand(process.cwd(), question, options);
|
|
1392
2711
|
} catch (err) {
|
|
1393
2712
|
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
@@ -1395,21 +2714,64 @@ program.command("query <question>").description("Ask a question against the wiki
|
|
|
1395
2714
|
}
|
|
1396
2715
|
});
|
|
1397
2716
|
program.command("watch").description("Watch sources/ and auto-recompile on changes").action(async () => {
|
|
1398
|
-
requireApiKey();
|
|
1399
2717
|
try {
|
|
2718
|
+
requireProvider();
|
|
1400
2719
|
await watchCommand();
|
|
1401
2720
|
} catch (err) {
|
|
1402
2721
|
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
1403
2722
|
process.exit(1);
|
|
1404
2723
|
}
|
|
1405
2724
|
});
|
|
1406
|
-
program.
|
|
1407
|
-
|
|
1408
|
-
|
|
2725
|
+
program.command("lint").description("Run rule-based quality checks against the wiki").action(async () => {
|
|
2726
|
+
try {
|
|
2727
|
+
await lintCommand();
|
|
2728
|
+
} catch (err) {
|
|
2729
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
2730
|
+
process.exit(1);
|
|
2731
|
+
}
|
|
2732
|
+
});
|
|
2733
|
+
program.command("serve").description("Start an MCP server exposing wiki tools and resources over stdio").option("--root <dir>", "Project root directory", process.cwd()).action(async (options) => {
|
|
2734
|
+
try {
|
|
2735
|
+
await startMCPServer({ root: options.root, version });
|
|
2736
|
+
} catch (err) {
|
|
2737
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
2738
|
+
process.exit(1);
|
|
2739
|
+
}
|
|
2740
|
+
});
|
|
2741
|
+
var PROVIDER_KEY_VARS2 = {
|
|
2742
|
+
anthropic: "ANTHROPIC_API_KEY",
|
|
2743
|
+
openai: "OPENAI_API_KEY",
|
|
2744
|
+
ollama: null,
|
|
2745
|
+
minimax: "MINIMAX_API_KEY"
|
|
2746
|
+
};
|
|
2747
|
+
function requireProvider() {
|
|
2748
|
+
const provider = process.env.LLMWIKI_PROVIDER ?? DEFAULT_PROVIDER;
|
|
2749
|
+
if (provider === "anthropic") {
|
|
2750
|
+
const auth = resolveAnthropicAuthFromEnv();
|
|
2751
|
+
if (!auth.apiKey && !auth.authToken) {
|
|
2752
|
+
console.error(
|
|
2753
|
+
`\x1B[31mError:\x1B[0m Anthropic credentials are required for the "anthropic" provider.
|
|
2754
|
+
Set one of: export ANTHROPIC_API_KEY=<your-key> OR export ANTHROPIC_AUTH_TOKEN=<your-token>`
|
|
2755
|
+
);
|
|
2756
|
+
process.exit(1);
|
|
2757
|
+
}
|
|
2758
|
+
return;
|
|
2759
|
+
}
|
|
2760
|
+
const keyVar = PROVIDER_KEY_VARS2[provider];
|
|
2761
|
+
if (keyVar === void 0) {
|
|
2762
|
+
console.error(
|
|
2763
|
+
`\x1B[31mError:\x1B[0m Unknown provider "${provider}".
|
|
2764
|
+
Supported: ${Object.keys(PROVIDER_KEY_VARS2).join(", ")}`
|
|
2765
|
+
);
|
|
2766
|
+
process.exit(1);
|
|
2767
|
+
}
|
|
2768
|
+
if (keyVar && !process.env[keyVar]) {
|
|
1409
2769
|
console.error(
|
|
1410
|
-
|
|
2770
|
+
`\x1B[31mError:\x1B[0m ${keyVar} environment variable is required for the "${provider}" provider.
|
|
2771
|
+
Set it with: export ${keyVar}=<your-key>`
|
|
1411
2772
|
);
|
|
1412
2773
|
process.exit(1);
|
|
1413
2774
|
}
|
|
1414
2775
|
}
|
|
2776
|
+
program.parse();
|
|
1415
2777
|
//# sourceMappingURL=cli.js.map
|