llm-wiki-compiler 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +115 -9
- package/dist/cli.js +1599 -224
- package/dist/cli.js.map +1 -1
- package/package.json +5 -2
package/dist/cli.js
CHANGED
|
@@ -66,7 +66,14 @@ var COMPILE_CONCURRENCY = 5;
|
|
|
66
66
|
var RETRY_COUNT = 3;
|
|
67
67
|
var RETRY_BASE_MS = 1e3;
|
|
68
68
|
var RETRY_MULTIPLIER = 4;
|
|
69
|
-
var
|
|
69
|
+
var DEFAULT_PROVIDER = "anthropic";
|
|
70
|
+
var PROVIDER_MODELS = {
|
|
71
|
+
anthropic: "claude-sonnet-4-20250514",
|
|
72
|
+
openai: "gpt-4o",
|
|
73
|
+
ollama: "llama3.1",
|
|
74
|
+
minimax: "MiniMax-M2.7"
|
|
75
|
+
};
|
|
76
|
+
var OLLAMA_DEFAULT_HOST = "http://localhost:11434/v1";
|
|
70
77
|
var SOURCES_DIR = "sources";
|
|
71
78
|
var CONCEPTS_DIR = "wiki/concepts";
|
|
72
79
|
var QUERIES_DIR = "wiki/queries";
|
|
@@ -74,6 +81,14 @@ var LLMWIKI_DIR = ".llmwiki";
|
|
|
74
81
|
var STATE_FILE = ".llmwiki/state.json";
|
|
75
82
|
var LOCK_FILE = ".llmwiki/lock";
|
|
76
83
|
var INDEX_FILE = "wiki/index.md";
|
|
84
|
+
var MOC_FILE = "wiki/MOC.md";
|
|
85
|
+
var EMBEDDINGS_FILE = ".llmwiki/embeddings.json";
|
|
86
|
+
var EMBEDDING_TOP_K = 15;
|
|
87
|
+
var EMBEDDING_MODELS = {
|
|
88
|
+
anthropic: "voyage-3-lite",
|
|
89
|
+
openai: "text-embedding-3-small",
|
|
90
|
+
ollama: "nomic-embed-text"
|
|
91
|
+
};
|
|
77
92
|
|
|
78
93
|
// src/utils/output.ts
|
|
79
94
|
var RESET = "\x1B[0m";
|
|
@@ -234,26 +249,36 @@ async function saveSource(title, document) {
|
|
|
234
249
|
await writeFile2(destPath, document, "utf-8");
|
|
235
250
|
return destPath;
|
|
236
251
|
}
|
|
237
|
-
async function
|
|
252
|
+
async function ingestSource(source2) {
|
|
238
253
|
status("*", info(`Ingesting: ${source2}`));
|
|
239
254
|
const { title, content } = isUrl(source2) ? await ingestWeb(source2) : await ingestFile(source2);
|
|
240
255
|
const result = enforceCharLimit(content);
|
|
241
256
|
enforceMinContent(result.content);
|
|
242
257
|
const document = buildDocument(title, source2, result);
|
|
243
258
|
const savedPath = await saveSource(title, document);
|
|
259
|
+
return {
|
|
260
|
+
filename: path3.basename(savedPath),
|
|
261
|
+
charCount: result.content.length,
|
|
262
|
+
truncated: result.truncated,
|
|
263
|
+
source: source2
|
|
264
|
+
};
|
|
265
|
+
}
|
|
266
|
+
async function ingest(source2) {
|
|
267
|
+
const result = await ingestSource(source2);
|
|
268
|
+
const savedPath = path3.join(SOURCES_DIR, result.filename);
|
|
244
269
|
status(
|
|
245
270
|
"+",
|
|
246
|
-
success(`Saved ${bold(
|
|
271
|
+
success(`Saved ${bold(result.filename)} \u2192 ${source(savedPath)}`)
|
|
247
272
|
);
|
|
248
273
|
status("\u2192", dim("Next: llmwiki compile"));
|
|
249
274
|
}
|
|
250
275
|
|
|
251
276
|
// src/commands/compile.ts
|
|
252
|
-
import { existsSync as
|
|
277
|
+
import { existsSync as existsSync4 } from "fs";
|
|
253
278
|
|
|
254
279
|
// src/compiler/index.ts
|
|
255
|
-
import { readFile as
|
|
256
|
-
import
|
|
280
|
+
import { readFile as readFile8, readdir as readdir6 } from "fs/promises";
|
|
281
|
+
import path13 from "path";
|
|
257
282
|
|
|
258
283
|
// src/utils/state.ts
|
|
259
284
|
import { readFile as readFile3, writeFile as writeFile3, rename as rename2, mkdir as mkdir3, copyFile } from "fs/promises";
|
|
@@ -296,30 +321,387 @@ async function removeSourceState(root, sourceFile) {
|
|
|
296
321
|
await writeState(root, state);
|
|
297
322
|
}
|
|
298
323
|
|
|
299
|
-
// src/
|
|
324
|
+
// src/providers/anthropic.ts
|
|
300
325
|
import Anthropic from "@anthropic-ai/sdk";
|
|
301
|
-
var
|
|
302
|
-
function
|
|
303
|
-
|
|
304
|
-
|
|
326
|
+
var VOYAGE_EMBEDDINGS_URL = "https://api.voyageai.com/v1/embeddings";
|
|
327
|
+
function buildAnthropicClientOptions(options = {}) {
|
|
328
|
+
const trimmedBaseURL = options.baseURL?.trim();
|
|
329
|
+
const trimmedApiKey = options.apiKey?.trim();
|
|
330
|
+
const trimmedAuthToken = options.authToken?.trim();
|
|
331
|
+
const result = {};
|
|
332
|
+
if (trimmedApiKey) {
|
|
333
|
+
result.apiKey = trimmedApiKey;
|
|
334
|
+
}
|
|
335
|
+
if (trimmedAuthToken) {
|
|
336
|
+
result.authToken = trimmedAuthToken;
|
|
337
|
+
}
|
|
338
|
+
if (!trimmedBaseURL) {
|
|
339
|
+
return result;
|
|
340
|
+
}
|
|
341
|
+
const normalizedBaseURL = trimmedBaseURL.endsWith("/") && trimmedBaseURL.length > 1 ? trimmedBaseURL.slice(0, -1) : trimmedBaseURL;
|
|
342
|
+
result.baseURL = normalizedBaseURL;
|
|
343
|
+
return result;
|
|
344
|
+
}
|
|
345
|
+
var AnthropicProvider = class {
|
|
346
|
+
client;
|
|
347
|
+
model;
|
|
348
|
+
constructor(model, options = {}) {
|
|
349
|
+
this.model = model;
|
|
350
|
+
this.client = new Anthropic(buildAnthropicClientOptions(options));
|
|
351
|
+
}
|
|
352
|
+
/** Send a single non-streaming completion request. */
|
|
353
|
+
async complete(system, messages, maxTokens) {
|
|
354
|
+
const response = await this.client.messages.create({
|
|
355
|
+
model: this.model,
|
|
356
|
+
max_tokens: maxTokens,
|
|
357
|
+
system,
|
|
358
|
+
messages
|
|
359
|
+
});
|
|
360
|
+
const textBlock = response.content.find((block) => block.type === "text");
|
|
361
|
+
return textBlock?.type === "text" ? textBlock.text : "";
|
|
362
|
+
}
|
|
363
|
+
/** Stream a completion, invoking onToken for each text chunk. */
|
|
364
|
+
async stream(system, messages, maxTokens, onToken) {
|
|
365
|
+
const stream = this.client.messages.stream({
|
|
366
|
+
model: this.model,
|
|
367
|
+
max_tokens: maxTokens,
|
|
368
|
+
system,
|
|
369
|
+
messages
|
|
370
|
+
});
|
|
371
|
+
let fullText = "";
|
|
372
|
+
for await (const event of stream) {
|
|
373
|
+
if (event.type === "content_block_delta" && event.delta.type === "text_delta") {
|
|
374
|
+
fullText += event.delta.text;
|
|
375
|
+
onToken?.(event.delta.text);
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
return fullText;
|
|
379
|
+
}
|
|
380
|
+
/** Call Claude with tool definitions and return the parsed tool input as JSON. */
|
|
381
|
+
async toolCall(system, messages, tools, maxTokens) {
|
|
382
|
+
const anthropicTools = tools.map((t) => ({
|
|
383
|
+
name: t.name,
|
|
384
|
+
description: t.description,
|
|
385
|
+
input_schema: t.input_schema
|
|
386
|
+
}));
|
|
387
|
+
const response = await this.client.messages.create({
|
|
388
|
+
model: this.model,
|
|
389
|
+
max_tokens: maxTokens,
|
|
390
|
+
system,
|
|
391
|
+
messages,
|
|
392
|
+
tools: anthropicTools
|
|
393
|
+
});
|
|
394
|
+
const toolBlock = response.content.find((block) => block.type === "tool_use");
|
|
395
|
+
if (toolBlock?.type === "tool_use") {
|
|
396
|
+
return JSON.stringify(toolBlock.input);
|
|
397
|
+
}
|
|
398
|
+
const textBlock = response.content.find((block) => block.type === "text");
|
|
399
|
+
return textBlock?.type === "text" ? textBlock.text : "";
|
|
400
|
+
}
|
|
401
|
+
/**
|
|
402
|
+
* Produce a single embedding vector via the Voyage API.
|
|
403
|
+
*
|
|
404
|
+
* Anthropic does not ship a first-party embeddings endpoint, so we delegate
|
|
405
|
+
* to Voyage (their recommended partner). Requires VOYAGE_API_KEY.
|
|
406
|
+
*/
|
|
407
|
+
async embed(text) {
|
|
408
|
+
const apiKey = process.env.VOYAGE_API_KEY?.trim();
|
|
409
|
+
if (!apiKey) {
|
|
410
|
+
throw new Error(
|
|
411
|
+
"VOYAGE_API_KEY is not set. Anthropic embeddings use Voyage \u2014 set VOYAGE_API_KEY to enable semantic search."
|
|
412
|
+
);
|
|
413
|
+
}
|
|
414
|
+
const response = await fetch(VOYAGE_EMBEDDINGS_URL, {
|
|
415
|
+
method: "POST",
|
|
416
|
+
headers: {
|
|
417
|
+
"Content-Type": "application/json",
|
|
418
|
+
Authorization: `Bearer ${apiKey}`
|
|
419
|
+
},
|
|
420
|
+
body: JSON.stringify({ input: text, model: EMBEDDING_MODELS.anthropic })
|
|
421
|
+
});
|
|
422
|
+
if (!response.ok) {
|
|
423
|
+
const detail = await response.text();
|
|
424
|
+
throw new Error(`Voyage embeddings request failed (${response.status}): ${detail}`);
|
|
425
|
+
}
|
|
426
|
+
const json = await response.json();
|
|
427
|
+
const vector = json.data?.[0]?.embedding;
|
|
428
|
+
if (!Array.isArray(vector)) {
|
|
429
|
+
throw new Error("Voyage embeddings response did not include a vector.");
|
|
430
|
+
}
|
|
431
|
+
return vector;
|
|
432
|
+
}
|
|
433
|
+
};
|
|
434
|
+
|
|
435
|
+
// src/providers/openai.ts
|
|
436
|
+
import OpenAI from "openai";
|
|
437
|
+
function translateToolToOpenAI(tool) {
|
|
438
|
+
return {
|
|
439
|
+
type: "function",
|
|
440
|
+
function: {
|
|
441
|
+
name: tool.name,
|
|
442
|
+
description: tool.description,
|
|
443
|
+
parameters: tool.input_schema
|
|
444
|
+
}
|
|
445
|
+
};
|
|
446
|
+
}
|
|
447
|
+
var OpenAIProvider = class {
|
|
448
|
+
client;
|
|
449
|
+
model;
|
|
450
|
+
constructor(model, baseURL, apiKey) {
|
|
451
|
+
this.model = model;
|
|
452
|
+
const resolvedKey = apiKey ?? process.env.OPENAI_API_KEY ?? "";
|
|
453
|
+
this.client = new OpenAI({
|
|
454
|
+
apiKey: resolvedKey,
|
|
455
|
+
...baseURL ? { baseURL } : {}
|
|
456
|
+
});
|
|
457
|
+
}
|
|
458
|
+
/** Send a single non-streaming completion request. */
|
|
459
|
+
async complete(system, messages, maxTokens) {
|
|
460
|
+
const response = await this.client.chat.completions.create({
|
|
461
|
+
model: this.model,
|
|
462
|
+
max_tokens: maxTokens,
|
|
463
|
+
messages: [{ role: "system", content: system }, ...messages]
|
|
464
|
+
});
|
|
465
|
+
return response.choices[0]?.message?.content ?? "";
|
|
466
|
+
}
|
|
467
|
+
/** Stream a completion, invoking onToken for each text chunk. */
|
|
468
|
+
async stream(system, messages, maxTokens, onToken) {
|
|
469
|
+
const stream = await this.client.chat.completions.create({
|
|
470
|
+
model: this.model,
|
|
471
|
+
max_tokens: maxTokens,
|
|
472
|
+
messages: [{ role: "system", content: system }, ...messages],
|
|
473
|
+
stream: true
|
|
474
|
+
});
|
|
475
|
+
let fullText = "";
|
|
476
|
+
for await (const chunk of stream) {
|
|
477
|
+
const delta = chunk.choices[0]?.delta?.content;
|
|
478
|
+
if (delta) {
|
|
479
|
+
fullText += delta;
|
|
480
|
+
onToken?.(delta);
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
return fullText;
|
|
484
|
+
}
|
|
485
|
+
/** Call the model with tool definitions and return the parsed tool input as JSON. */
|
|
486
|
+
async toolCall(system, messages, tools, maxTokens) {
|
|
487
|
+
const openaiTools = tools.map(translateToolToOpenAI);
|
|
488
|
+
const response = await this.client.chat.completions.create({
|
|
489
|
+
model: this.model,
|
|
490
|
+
max_tokens: maxTokens,
|
|
491
|
+
messages: [{ role: "system", content: system }, ...messages],
|
|
492
|
+
tools: openaiTools
|
|
493
|
+
});
|
|
494
|
+
const toolCalls = response.choices[0]?.message?.tool_calls;
|
|
495
|
+
if (toolCalls && toolCalls.length > 0) {
|
|
496
|
+
return toolCalls[0].function.arguments;
|
|
497
|
+
}
|
|
498
|
+
return response.choices[0]?.message?.content ?? "";
|
|
499
|
+
}
|
|
500
|
+
/**
|
|
501
|
+
* Produce a single embedding vector via the OpenAI embeddings API.
|
|
502
|
+
* Subclasses (e.g. Ollama) override embeddingModel() to pick a different model.
|
|
503
|
+
*/
|
|
504
|
+
async embed(text) {
|
|
505
|
+
const response = await this.client.embeddings.create({
|
|
506
|
+
model: this.embeddingModel(),
|
|
507
|
+
input: text
|
|
508
|
+
});
|
|
509
|
+
const vector = response.data[0]?.embedding;
|
|
510
|
+
if (!Array.isArray(vector)) {
|
|
511
|
+
throw new Error("OpenAI embeddings response did not include a vector.");
|
|
512
|
+
}
|
|
513
|
+
return vector;
|
|
514
|
+
}
|
|
515
|
+
/** Default embedding model for this provider. Subclasses may override. */
|
|
516
|
+
embeddingModel() {
|
|
517
|
+
return EMBEDDING_MODELS.openai;
|
|
518
|
+
}
|
|
519
|
+
};
|
|
520
|
+
|
|
521
|
+
// src/providers/ollama.ts
|
|
522
|
+
var OllamaProvider = class extends OpenAIProvider {
|
|
523
|
+
constructor(model, baseURL) {
|
|
524
|
+
super(model, baseURL, "ollama");
|
|
525
|
+
}
|
|
526
|
+
/** Ollama ships a dedicated embedding model (nomic-embed-text). */
|
|
527
|
+
embeddingModel() {
|
|
528
|
+
return EMBEDDING_MODELS.ollama;
|
|
529
|
+
}
|
|
530
|
+
};
|
|
531
|
+
|
|
532
|
+
// src/providers/minimax.ts
|
|
533
|
+
var MINIMAX_BASE_URL = "https://api.minimax.io/v1";
|
|
534
|
+
var MiniMaxProvider = class extends OpenAIProvider {
|
|
535
|
+
constructor(model, apiKey) {
|
|
536
|
+
super(model, MINIMAX_BASE_URL, apiKey);
|
|
537
|
+
}
|
|
538
|
+
};
|
|
539
|
+
|
|
540
|
+
// src/utils/claude-settings.ts
|
|
541
|
+
import { readFileSync } from "fs";
|
|
542
|
+
import { homedir } from "os";
|
|
543
|
+
import path5 from "path";
|
|
544
|
+
var CLAUDE_SETTINGS_PATH_ENV = "LLMWIKI_CLAUDE_SETTINGS_PATH";
|
|
545
|
+
function isRecord(value) {
|
|
546
|
+
return typeof value === "object" && value !== null;
|
|
547
|
+
}
|
|
548
|
+
function normalize(value) {
|
|
549
|
+
if (typeof value !== "string") return void 0;
|
|
550
|
+
const trimmed = value.trim();
|
|
551
|
+
return trimmed.length > 0 ? trimmed : void 0;
|
|
552
|
+
}
|
|
553
|
+
function resolveClaudeSettingsPath(env) {
|
|
554
|
+
return env[CLAUDE_SETTINGS_PATH_ENV] ?? path5.join(homedir(), ".claude", "settings.json");
|
|
555
|
+
}
|
|
556
|
+
function readClaudeSettingsFile(settingsPath) {
|
|
557
|
+
try {
|
|
558
|
+
return readFileSync(settingsPath, "utf8");
|
|
559
|
+
} catch (err) {
|
|
560
|
+
if (isRecord(err) && err.code === "ENOENT") {
|
|
561
|
+
return void 0;
|
|
562
|
+
}
|
|
563
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
564
|
+
throw new Error(`Failed to read Claude settings at "${settingsPath}": ${message}`);
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
function readClaudeSettingsEnv(env = process.env) {
|
|
568
|
+
const settingsPath = resolveClaudeSettingsPath(env);
|
|
569
|
+
const raw = readClaudeSettingsFile(settingsPath);
|
|
570
|
+
if (!raw) return void 0;
|
|
571
|
+
let parsed;
|
|
572
|
+
try {
|
|
573
|
+
parsed = JSON.parse(raw);
|
|
574
|
+
} catch (err) {
|
|
575
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
576
|
+
throw new Error(`Failed to parse Claude settings at "${settingsPath}": ${message}`);
|
|
577
|
+
}
|
|
578
|
+
if (!isRecord(parsed) || !isRecord(parsed.env)) {
|
|
579
|
+
return void 0;
|
|
305
580
|
}
|
|
306
|
-
|
|
581
|
+
const values = {
|
|
582
|
+
ANTHROPIC_API_KEY: normalize(parsed.env.ANTHROPIC_API_KEY),
|
|
583
|
+
ANTHROPIC_AUTH_TOKEN: normalize(parsed.env.ANTHROPIC_AUTH_TOKEN),
|
|
584
|
+
ANTHROPIC_BASE_URL: normalize(parsed.env.ANTHROPIC_BASE_URL),
|
|
585
|
+
ANTHROPIC_MODEL: normalize(parsed.env.ANTHROPIC_MODEL)
|
|
586
|
+
};
|
|
587
|
+
if (!values.ANTHROPIC_API_KEY && !values.ANTHROPIC_AUTH_TOKEN && !values.ANTHROPIC_BASE_URL && !values.ANTHROPIC_MODEL) {
|
|
588
|
+
return void 0;
|
|
589
|
+
}
|
|
590
|
+
return values;
|
|
591
|
+
}
|
|
592
|
+
function tryReadClaudeSettingsEnv(env) {
|
|
593
|
+
try {
|
|
594
|
+
return readClaudeSettingsEnv(env);
|
|
595
|
+
} catch {
|
|
596
|
+
return void 0;
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
function validateAnthropicBaseURL(value) {
|
|
600
|
+
const normalized = value.trim();
|
|
601
|
+
try {
|
|
602
|
+
const parsed = new URL(normalized);
|
|
603
|
+
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
|
|
604
|
+
throw new Error("Must use http:// or https:// protocol.");
|
|
605
|
+
}
|
|
606
|
+
} catch (err) {
|
|
607
|
+
const message = err instanceof Error ? err.message : "Must be a valid http(s) URL.";
|
|
608
|
+
throw new Error(`Invalid ANTHROPIC_BASE_URL: "${normalized}". ${message}`);
|
|
609
|
+
}
|
|
610
|
+
return normalized;
|
|
611
|
+
}
|
|
612
|
+
function resolveAnthropicAuthFromEnv(env = process.env) {
|
|
613
|
+
const explicitApiKey = normalize(env.ANTHROPIC_API_KEY);
|
|
614
|
+
if (explicitApiKey) return { apiKey: explicitApiKey };
|
|
615
|
+
const explicitAuthToken = normalize(env.ANTHROPIC_AUTH_TOKEN);
|
|
616
|
+
if (explicitAuthToken) return { authToken: explicitAuthToken };
|
|
617
|
+
const fallback = readClaudeSettingsEnv(env);
|
|
618
|
+
if (fallback?.ANTHROPIC_API_KEY) return { apiKey: fallback.ANTHROPIC_API_KEY };
|
|
619
|
+
if (fallback?.ANTHROPIC_AUTH_TOKEN) return { authToken: fallback.ANTHROPIC_AUTH_TOKEN };
|
|
620
|
+
return {};
|
|
621
|
+
}
|
|
622
|
+
function resolveAnthropicModelFromEnv(env = process.env) {
|
|
623
|
+
const explicitModel = env.LLMWIKI_MODEL;
|
|
624
|
+
if (explicitModel !== void 0) return explicitModel;
|
|
625
|
+
return tryReadClaudeSettingsEnv(env)?.ANTHROPIC_MODEL;
|
|
626
|
+
}
|
|
627
|
+
function resolveAnthropicBaseURLFromEnv(env = process.env) {
|
|
628
|
+
const explicitBaseURL = normalize(env.ANTHROPIC_BASE_URL);
|
|
629
|
+
if (explicitBaseURL) return validateAnthropicBaseURL(explicitBaseURL);
|
|
630
|
+
const fallbackBaseURL = tryReadClaudeSettingsEnv(env)?.ANTHROPIC_BASE_URL;
|
|
631
|
+
if (!fallbackBaseURL) return void 0;
|
|
632
|
+
return validateAnthropicBaseURL(fallbackBaseURL);
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
// src/utils/provider.ts
|
|
636
|
+
var SUPPORTED_PROVIDERS = /* @__PURE__ */ new Set(["anthropic", "openai", "ollama", "minimax"]);
|
|
637
|
+
function getProvider() {
|
|
638
|
+
const providerName = getProviderName();
|
|
639
|
+
switch (providerName) {
|
|
640
|
+
case "anthropic":
|
|
641
|
+
return getAnthropicProvider();
|
|
642
|
+
case "openai":
|
|
643
|
+
return new OpenAIProvider(getModelForProvider("openai"));
|
|
644
|
+
case "ollama":
|
|
645
|
+
return new OllamaProvider(
|
|
646
|
+
getModelForProvider("ollama"),
|
|
647
|
+
process.env.OLLAMA_HOST ?? OLLAMA_DEFAULT_HOST
|
|
648
|
+
);
|
|
649
|
+
case "minimax":
|
|
650
|
+
return getMiniMaxProvider();
|
|
651
|
+
default:
|
|
652
|
+
throw new Error(`Unhandled provider: ${providerName}`);
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
function getModelForProvider(providerName) {
|
|
656
|
+
return process.env.LLMWIKI_MODEL ?? PROVIDER_MODELS[providerName];
|
|
657
|
+
}
|
|
658
|
+
function getMiniMaxProvider() {
|
|
659
|
+
const apiKey = process.env.MINIMAX_API_KEY;
|
|
660
|
+
if (!apiKey) {
|
|
661
|
+
throw new Error(
|
|
662
|
+
"MiniMax provider requires MINIMAX_API_KEY environment variable.\n Set it with: export MINIMAX_API_KEY=your_key"
|
|
663
|
+
);
|
|
664
|
+
}
|
|
665
|
+
return new MiniMaxProvider(getModelForProvider("minimax"), apiKey);
|
|
666
|
+
}
|
|
667
|
+
function getAnthropicProvider() {
|
|
668
|
+
const model = resolveAnthropicModelFromEnv() ?? PROVIDER_MODELS.anthropic;
|
|
669
|
+
const baseURL = resolveAnthropicBaseURLFromEnv();
|
|
670
|
+
const auth = resolveAnthropicAuthFromEnv();
|
|
671
|
+
return new AnthropicProvider(model, {
|
|
672
|
+
baseURL,
|
|
673
|
+
...auth
|
|
674
|
+
});
|
|
675
|
+
}
|
|
676
|
+
function getProviderName() {
|
|
677
|
+
const providerName = process.env.LLMWIKI_PROVIDER ?? DEFAULT_PROVIDER;
|
|
678
|
+
if (!SUPPORTED_PROVIDERS.has(providerName)) {
|
|
679
|
+
throw new Error(
|
|
680
|
+
`Unknown provider "${providerName}". Supported: ${[...SUPPORTED_PROVIDERS].join(", ")}`
|
|
681
|
+
);
|
|
682
|
+
}
|
|
683
|
+
return providerName;
|
|
684
|
+
}
|
|
685
|
+
function getActiveProviderName() {
|
|
686
|
+
return getProviderName();
|
|
307
687
|
}
|
|
688
|
+
|
|
689
|
+
// src/utils/llm.ts
|
|
308
690
|
function sleep(ms) {
|
|
309
691
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
310
692
|
}
|
|
311
693
|
async function callClaude(options) {
|
|
312
694
|
const { system, messages, tools, maxTokens = 4096, stream = false, onToken } = options;
|
|
313
|
-
const
|
|
695
|
+
const provider = getProvider();
|
|
314
696
|
for (let attempt = 0; attempt <= RETRY_COUNT; attempt++) {
|
|
315
697
|
try {
|
|
316
698
|
if (stream) {
|
|
317
|
-
return await
|
|
699
|
+
return await provider.stream(system, messages, maxTokens, onToken);
|
|
318
700
|
}
|
|
319
701
|
if (tools && tools.length > 0) {
|
|
320
|
-
return await
|
|
702
|
+
return await provider.toolCall(system, messages, tools, maxTokens);
|
|
321
703
|
}
|
|
322
|
-
return await
|
|
704
|
+
return await provider.complete(system, messages, maxTokens);
|
|
323
705
|
} catch (error2) {
|
|
324
706
|
if (attempt === RETRY_COUNT) throw error2;
|
|
325
707
|
const delayMs = RETRY_BASE_MS * Math.pow(RETRY_MULTIPLIER, attempt);
|
|
@@ -331,57 +713,10 @@ async function callClaude(options) {
|
|
|
331
713
|
}
|
|
332
714
|
throw new Error("Unreachable");
|
|
333
715
|
}
|
|
334
|
-
async function callClaudeStreaming(anthropic, system, messages, maxTokens, onToken) {
|
|
335
|
-
const stream = anthropic.messages.stream({
|
|
336
|
-
model: MODEL,
|
|
337
|
-
max_tokens: maxTokens,
|
|
338
|
-
system,
|
|
339
|
-
messages
|
|
340
|
-
});
|
|
341
|
-
let fullText = "";
|
|
342
|
-
for await (const event of stream) {
|
|
343
|
-
if (event.type === "content_block_delta" && event.delta.type === "text_delta") {
|
|
344
|
-
fullText += event.delta.text;
|
|
345
|
-
onToken?.(event.delta.text);
|
|
346
|
-
}
|
|
347
|
-
}
|
|
348
|
-
return fullText;
|
|
349
|
-
}
|
|
350
|
-
async function callClaudeToolUse(anthropic, system, messages, tools, maxTokens) {
|
|
351
|
-
const response = await anthropic.messages.create({
|
|
352
|
-
model: MODEL,
|
|
353
|
-
max_tokens: maxTokens,
|
|
354
|
-
system,
|
|
355
|
-
messages,
|
|
356
|
-
tools
|
|
357
|
-
});
|
|
358
|
-
const toolBlock = response.content.find((block) => block.type === "tool_use");
|
|
359
|
-
if (toolBlock && toolBlock.type === "tool_use") {
|
|
360
|
-
return JSON.stringify(toolBlock.input);
|
|
361
|
-
}
|
|
362
|
-
const textBlock = response.content.find((block) => block.type === "text");
|
|
363
|
-
if (textBlock && textBlock.type === "text") {
|
|
364
|
-
return textBlock.text;
|
|
365
|
-
}
|
|
366
|
-
return "";
|
|
367
|
-
}
|
|
368
|
-
async function callClaudeBasic(anthropic, system, messages, maxTokens) {
|
|
369
|
-
const response = await anthropic.messages.create({
|
|
370
|
-
model: MODEL,
|
|
371
|
-
max_tokens: maxTokens,
|
|
372
|
-
system,
|
|
373
|
-
messages
|
|
374
|
-
});
|
|
375
|
-
const textBlock = response.content.find((block) => block.type === "text");
|
|
376
|
-
if (textBlock && textBlock.type === "text") {
|
|
377
|
-
return textBlock.text;
|
|
378
|
-
}
|
|
379
|
-
return "";
|
|
380
|
-
}
|
|
381
716
|
|
|
382
717
|
// src/utils/lock.ts
|
|
383
718
|
import { open, readFile as readFile4, unlink, mkdir as mkdir4 } from "fs/promises";
|
|
384
|
-
import
|
|
719
|
+
import path6 from "path";
|
|
385
720
|
var RECLAIM_SUFFIX = ".reclaim";
|
|
386
721
|
var MAX_ACQUIRE_ATTEMPTS = 2;
|
|
387
722
|
function isProcessAlive(pid) {
|
|
@@ -393,8 +728,8 @@ function isProcessAlive(pid) {
|
|
|
393
728
|
}
|
|
394
729
|
}
|
|
395
730
|
async function acquireLock(root) {
|
|
396
|
-
const lockPath =
|
|
397
|
-
await mkdir4(
|
|
731
|
+
const lockPath = path6.join(root, LOCK_FILE);
|
|
732
|
+
await mkdir4(path6.join(root, LLMWIKI_DIR), { recursive: true });
|
|
398
733
|
for (let attempt = 0; attempt < MAX_ACQUIRE_ATTEMPTS; attempt++) {
|
|
399
734
|
const created = await tryCreateLock(lockPath);
|
|
400
735
|
if (created) return true;
|
|
@@ -466,7 +801,7 @@ async function isLockStale(lockPath) {
|
|
|
466
801
|
}
|
|
467
802
|
}
|
|
468
803
|
async function releaseLock(root) {
|
|
469
|
-
const lockPath =
|
|
804
|
+
const lockPath = path6.join(root, LOCK_FILE);
|
|
470
805
|
try {
|
|
471
806
|
await unlink(lockPath);
|
|
472
807
|
} catch {
|
|
@@ -496,6 +831,11 @@ var CONCEPT_EXTRACTION_TOOL = {
|
|
|
496
831
|
is_new: {
|
|
497
832
|
type: "boolean",
|
|
498
833
|
description: "True if this is a new concept not in existing wiki"
|
|
834
|
+
},
|
|
835
|
+
tags: {
|
|
836
|
+
type: "array",
|
|
837
|
+
items: { type: "string" },
|
|
838
|
+
description: "2-4 categorical tags for organizing this concept (e.g., 'machine-learning', 'optimization')"
|
|
499
839
|
}
|
|
500
840
|
},
|
|
501
841
|
required: ["concept", "summary", "is_new"]
|
|
@@ -539,6 +879,12 @@ ${relatedPages}` : "";
|
|
|
539
879
|
"Include a ## Sources section at the end listing the source document.",
|
|
540
880
|
"Suggest [[wikilinks]] to related concepts where appropriate.",
|
|
541
881
|
"Write in a neutral, informative tone. Be concise but thorough.",
|
|
882
|
+
"",
|
|
883
|
+
"Source attribution: at the end of each prose paragraph, append a citation",
|
|
884
|
+
"marker showing which source file(s) the paragraph drew from.",
|
|
885
|
+
"Format: ^[filename.md] for single-source, ^[source-a.md, source-b.md] for multi-source.",
|
|
886
|
+
"Place citations only at the end of prose paragraphs \u2014 not on headings, list items, or code blocks.",
|
|
887
|
+
"Source filenames are visible as `--- SOURCE: filename.md ---` headers in the content below.",
|
|
542
888
|
existingSection,
|
|
543
889
|
relatedSection,
|
|
544
890
|
"\n\n--- SOURCE MATERIAL ---\n\n",
|
|
@@ -550,8 +896,13 @@ function parseConcepts(toolOutput) {
|
|
|
550
896
|
const parsed = JSON.parse(toolOutput);
|
|
551
897
|
const concepts = parsed.concepts ?? [];
|
|
552
898
|
return concepts.filter(
|
|
553
|
-
(c) => typeof c.concept === "string" && typeof c.summary === "string" && typeof c.is_new === "boolean"
|
|
554
|
-
)
|
|
899
|
+
(c) => typeof c.concept === "string" && typeof c.summary === "string" && typeof c.is_new === "boolean" && (c.tags === void 0 || Array.isArray(c.tags))
|
|
900
|
+
).map((c) => ({
|
|
901
|
+
concept: c.concept,
|
|
902
|
+
summary: c.summary,
|
|
903
|
+
is_new: c.is_new,
|
|
904
|
+
tags: Array.isArray(c.tags) ? c.tags : void 0
|
|
905
|
+
}));
|
|
555
906
|
} catch {
|
|
556
907
|
return [];
|
|
557
908
|
}
|
|
@@ -560,13 +911,13 @@ function parseConcepts(toolOutput) {
|
|
|
560
911
|
// src/compiler/hasher.ts
|
|
561
912
|
import { createHash } from "crypto";
|
|
562
913
|
import { readFile as readFile5, readdir } from "fs/promises";
|
|
563
|
-
import
|
|
914
|
+
import path7 from "path";
|
|
564
915
|
async function hashFile(filePath) {
|
|
565
916
|
const content = await readFile5(filePath, "utf-8");
|
|
566
917
|
return createHash("sha256").update(content).digest("hex");
|
|
567
918
|
}
|
|
568
919
|
async function detectChanges(root, prevState) {
|
|
569
|
-
const sourcesPath =
|
|
920
|
+
const sourcesPath = path7.join(root, SOURCES_DIR);
|
|
570
921
|
const currentFiles = await listSourceFiles(sourcesPath);
|
|
571
922
|
const changes = [];
|
|
572
923
|
for (const file of currentFiles) {
|
|
@@ -586,7 +937,7 @@ async function listSourceFiles(sourcesPath) {
|
|
|
586
937
|
}
|
|
587
938
|
}
|
|
588
939
|
async function classifyFile(root, file, prevState) {
|
|
589
|
-
const filePath =
|
|
940
|
+
const filePath = path7.join(root, SOURCES_DIR, file);
|
|
590
941
|
const hash = await hashFile(filePath);
|
|
591
942
|
const prev = prevState.sources[file];
|
|
592
943
|
if (!prev) return "new";
|
|
@@ -613,28 +964,37 @@ function buildConceptToSourcesMap(sources) {
|
|
|
613
964
|
}
|
|
614
965
|
return conceptMap;
|
|
615
966
|
}
|
|
616
|
-
function
|
|
617
|
-
const
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
const deletedFiles = new Set(
|
|
621
|
-
directChanges.filter((c) => c.status === "deleted").map((c) => c.file)
|
|
967
|
+
function filesByStatus(changes, ...statuses) {
|
|
968
|
+
const statusSet = new Set(statuses);
|
|
969
|
+
return new Set(
|
|
970
|
+
changes.filter((c) => statusSet.has(c.status)).map((c) => c.file)
|
|
622
971
|
);
|
|
972
|
+
}
|
|
973
|
+
function collectSharedContributors(sourceFile, state, conceptMap, excludeSets, out) {
|
|
974
|
+
const sourceEntry = state.sources[sourceFile];
|
|
975
|
+
if (!sourceEntry) return;
|
|
976
|
+
for (const slug of sourceEntry.concepts) {
|
|
977
|
+
const contributors = conceptMap.get(slug);
|
|
978
|
+
if (!contributors || contributors.length < 2) continue;
|
|
979
|
+
for (const contributor of contributors) {
|
|
980
|
+
const isExcluded = excludeSets.some((s) => s.has(contributor));
|
|
981
|
+
if (!isExcluded) out.add(contributor);
|
|
982
|
+
}
|
|
983
|
+
}
|
|
984
|
+
}
|
|
985
|
+
function findAffectedSources(state, directChanges) {
|
|
986
|
+
const changedFiles = filesByStatus(directChanges, "new", "changed");
|
|
987
|
+
const deletedFiles = filesByStatus(directChanges, "deleted");
|
|
623
988
|
const conceptMap = buildConceptToSourcesMap(state.sources);
|
|
624
989
|
const affected = /* @__PURE__ */ new Set();
|
|
625
990
|
for (const changedFile of changedFiles) {
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
if (!skip) {
|
|
634
|
-
affected.add(contributor);
|
|
635
|
-
}
|
|
636
|
-
}
|
|
637
|
-
}
|
|
991
|
+
collectSharedContributors(
|
|
992
|
+
changedFile,
|
|
993
|
+
state,
|
|
994
|
+
conceptMap,
|
|
995
|
+
[changedFiles, deletedFiles, affected],
|
|
996
|
+
affected
|
|
997
|
+
);
|
|
638
998
|
}
|
|
639
999
|
return Array.from(affected);
|
|
640
1000
|
}
|
|
@@ -676,36 +1036,36 @@ async function persistFrozenSlugs(root, frozenSlugs, successfulExtractions) {
|
|
|
676
1036
|
const stateToSave = { ...currentState, frozenSlugs: Array.from(remaining) };
|
|
677
1037
|
await writeState(root, stateToSave);
|
|
678
1038
|
}
|
|
679
|
-
function
|
|
680
|
-
const compilingFiles = new Set(
|
|
681
|
-
allChanges.filter((c) => c.status === "new" || c.status === "changed").map((c) => c.file)
|
|
682
|
-
);
|
|
683
|
-
const deletedFiles = new Set(
|
|
684
|
-
allChanges.filter((c) => c.status === "deleted").map((c) => c.file)
|
|
685
|
-
);
|
|
686
|
-
const conceptMap = buildConceptToSourcesMap(state.sources);
|
|
1039
|
+
function collectFreshSlugs(extractions, state) {
|
|
687
1040
|
const freshSlugs = /* @__PURE__ */ new Set();
|
|
688
1041
|
for (const result of extractions) {
|
|
689
1042
|
const oldConcepts = new Set(state.sources[result.sourceFile]?.concepts ?? []);
|
|
690
1043
|
for (const c of result.concepts) {
|
|
691
1044
|
const slug = slugify(c.concept);
|
|
692
|
-
if (!oldConcepts.has(slug))
|
|
693
|
-
freshSlugs.add(slug);
|
|
694
|
-
}
|
|
1045
|
+
if (!oldConcepts.has(slug)) freshSlugs.add(slug);
|
|
695
1046
|
}
|
|
696
1047
|
}
|
|
1048
|
+
return freshSlugs;
|
|
1049
|
+
}
|
|
1050
|
+
function findSlugOwners(slugs, conceptMap, excludeSets) {
|
|
697
1051
|
const affected = /* @__PURE__ */ new Set();
|
|
698
|
-
for (const slug of
|
|
1052
|
+
for (const slug of slugs) {
|
|
699
1053
|
const owners = conceptMap.get(slug);
|
|
700
1054
|
if (!owners) continue;
|
|
701
1055
|
for (const owner of owners) {
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
}
|
|
1056
|
+
const isExcluded = excludeSets.some((s) => s.has(owner));
|
|
1057
|
+
if (!isExcluded) affected.add(owner);
|
|
705
1058
|
}
|
|
706
1059
|
}
|
|
707
1060
|
return Array.from(affected);
|
|
708
1061
|
}
|
|
1062
|
+
function findLateAffectedSources(extractions, state, allChanges) {
|
|
1063
|
+
const compilingFiles = filesByStatus(allChanges, "new", "changed");
|
|
1064
|
+
const deletedFiles = filesByStatus(allChanges, "deleted");
|
|
1065
|
+
const conceptMap = buildConceptToSourcesMap(state.sources);
|
|
1066
|
+
const freshSlugs = collectFreshSlugs(extractions, state);
|
|
1067
|
+
return findSlugOwners(freshSlugs, conceptMap, [compilingFiles, deletedFiles]);
|
|
1068
|
+
}
|
|
709
1069
|
function findSharedConcepts(sourceFile, state) {
|
|
710
1070
|
const shared = /* @__PURE__ */ new Set();
|
|
711
1071
|
const sourceEntry = state.sources[sourceFile];
|
|
@@ -735,7 +1095,7 @@ async function freezeFailedExtractions(root, results, frozenSlugs) {
|
|
|
735
1095
|
}
|
|
736
1096
|
|
|
737
1097
|
// src/compiler/orphan.ts
|
|
738
|
-
import
|
|
1098
|
+
import path8 from "path";
|
|
739
1099
|
async function markOrphaned(root, sourceFile, state) {
|
|
740
1100
|
const sourceEntry = state.sources[sourceFile];
|
|
741
1101
|
if (!sourceEntry) return;
|
|
@@ -761,7 +1121,7 @@ async function orphanUnownedFrozenPages(root, frozenSlugs) {
|
|
|
761
1121
|
}
|
|
762
1122
|
}
|
|
763
1123
|
async function orphanPage(root, slug, reason) {
|
|
764
|
-
const pagePath =
|
|
1124
|
+
const pagePath = path8.join(root, CONCEPTS_DIR, `${slug}.md`);
|
|
765
1125
|
const content = await safeReadFile(pagePath);
|
|
766
1126
|
if (!content) return;
|
|
767
1127
|
const { meta } = parseFrontmatter(content);
|
|
@@ -773,16 +1133,16 @@ async function orphanPage(root, slug, reason) {
|
|
|
773
1133
|
|
|
774
1134
|
// src/compiler/resolver.ts
|
|
775
1135
|
import { readdir as readdir2, readFile as readFile6 } from "fs/promises";
|
|
776
|
-
import
|
|
1136
|
+
import path9 from "path";
|
|
777
1137
|
import { existsSync as existsSync2 } from "fs";
|
|
778
1138
|
async function buildTitleIndex(root) {
|
|
779
|
-
const conceptsDir =
|
|
1139
|
+
const conceptsDir = path9.join(root, CONCEPTS_DIR);
|
|
780
1140
|
if (!existsSync2(conceptsDir)) return [];
|
|
781
1141
|
const files = await readdir2(conceptsDir);
|
|
782
1142
|
const pages = [];
|
|
783
1143
|
for (const file of files) {
|
|
784
1144
|
if (!file.endsWith(".md")) continue;
|
|
785
|
-
const filePath =
|
|
1145
|
+
const filePath = path9.join(conceptsDir, file);
|
|
786
1146
|
const content = await readFile6(filePath, "utf-8");
|
|
787
1147
|
const { meta } = parseFrontmatter(content);
|
|
788
1148
|
if (meta.title && typeof meta.title === "string" && !meta.orphaned) {
|
|
@@ -802,25 +1162,41 @@ function isInsideWikilink(text, position) {
|
|
|
802
1162
|
const closeBefore = text.indexOf("]]", before);
|
|
803
1163
|
return closeBefore >= position;
|
|
804
1164
|
}
|
|
1165
|
+
function isInsideCitation(text, position) {
|
|
1166
|
+
const before = text.lastIndexOf("^[", position);
|
|
1167
|
+
const after = text.indexOf("]", position);
|
|
1168
|
+
if (before === -1 || after === -1) return false;
|
|
1169
|
+
const closeBefore = text.indexOf("]", before);
|
|
1170
|
+
return closeBefore >= position;
|
|
1171
|
+
}
|
|
805
1172
|
function isWordBoundary(text, start, end) {
|
|
806
1173
|
const before = start === 0 || /[\s,.:;!?()\[\]{}/"']/.test(text[start - 1]);
|
|
807
1174
|
const after = end >= text.length || /[\s,.:;!?()\[\]{}/"']/.test(text[end]);
|
|
808
1175
|
return before && after;
|
|
809
1176
|
}
|
|
1177
|
+
function findTitleMatches(text, title) {
|
|
1178
|
+
const escaped = title.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
1179
|
+
const regex = new RegExp(escaped, "gi");
|
|
1180
|
+
const matches = [];
|
|
1181
|
+
let match;
|
|
1182
|
+
while ((match = regex.exec(text)) !== null) {
|
|
1183
|
+
matches.push({ start: match.index, end: match.index + match[0].length });
|
|
1184
|
+
}
|
|
1185
|
+
return matches;
|
|
1186
|
+
}
|
|
1187
|
+
function isLinkablePosition(text, start, end) {
|
|
1188
|
+
if (isInsideWikilink(text, start)) return false;
|
|
1189
|
+
if (isInsideCitation(text, start)) return false;
|
|
1190
|
+
return isWordBoundary(text, start, end);
|
|
1191
|
+
}
|
|
810
1192
|
function addWikilinks(body, titles, selfTitle) {
|
|
811
1193
|
let result = body;
|
|
1194
|
+
const selfLower = selfTitle.toLowerCase();
|
|
812
1195
|
for (const page of titles) {
|
|
813
|
-
if (page.title.toLowerCase() ===
|
|
814
|
-
const
|
|
815
|
-
const regex = new RegExp(escaped, "gi");
|
|
816
|
-
let match;
|
|
817
|
-
const matches = [];
|
|
818
|
-
while ((match = regex.exec(result)) !== null) {
|
|
819
|
-
matches.push({ start: match.index, end: match.index + match[0].length });
|
|
820
|
-
}
|
|
1196
|
+
if (page.title.toLowerCase() === selfLower) continue;
|
|
1197
|
+
const matches = findTitleMatches(result, page.title);
|
|
821
1198
|
for (const m of matches.reverse()) {
|
|
822
|
-
if (
|
|
823
|
-
if (!isWordBoundary(result, m.start, m.end)) continue;
|
|
1199
|
+
if (!isLinkablePosition(result, m.start, m.end)) continue;
|
|
824
1200
|
result = result.slice(0, m.start) + `[[${page.title}]]` + result.slice(m.end);
|
|
825
1201
|
}
|
|
826
1202
|
}
|
|
@@ -876,41 +1252,43 @@ async function linkPage(page, titleIndex) {
|
|
|
876
1252
|
|
|
877
1253
|
// src/compiler/indexgen.ts
|
|
878
1254
|
import { readdir as readdir3 } from "fs/promises";
|
|
879
|
-
import
|
|
1255
|
+
import path10 from "path";
|
|
880
1256
|
async function generateIndex(root) {
|
|
881
1257
|
status("*", info("Generating index..."));
|
|
882
|
-
const conceptsPath =
|
|
883
|
-
const queriesPath =
|
|
1258
|
+
const conceptsPath = path10.join(root, CONCEPTS_DIR);
|
|
1259
|
+
const queriesPath = path10.join(root, QUERIES_DIR);
|
|
884
1260
|
const concepts = await collectPageSummaries(conceptsPath);
|
|
885
1261
|
const queries = await collectPageSummaries(queriesPath);
|
|
886
1262
|
concepts.sort((a, b) => a.title.localeCompare(b.title));
|
|
887
1263
|
queries.sort((a, b) => a.title.localeCompare(b.title));
|
|
888
1264
|
const indexContent = buildIndexContent(concepts, queries);
|
|
889
|
-
const indexPath =
|
|
1265
|
+
const indexPath = path10.join(root, INDEX_FILE);
|
|
890
1266
|
await atomicWrite(indexPath, indexContent);
|
|
891
1267
|
const total = concepts.length + queries.length;
|
|
892
1268
|
status("+", success(`Index updated with ${total} pages.`));
|
|
893
1269
|
}
|
|
894
|
-
async function
|
|
1270
|
+
async function scanWikiPages(dirPath) {
|
|
895
1271
|
let files;
|
|
896
1272
|
try {
|
|
897
|
-
files = await readdir3(
|
|
1273
|
+
files = await readdir3(dirPath);
|
|
898
1274
|
} catch {
|
|
899
1275
|
return [];
|
|
900
1276
|
}
|
|
901
|
-
const
|
|
1277
|
+
const scanned = [];
|
|
902
1278
|
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
903
|
-
const content = await safeReadFile(
|
|
1279
|
+
const content = await safeReadFile(path10.join(dirPath, file));
|
|
904
1280
|
const { meta } = parseFrontmatter(content);
|
|
905
|
-
|
|
906
|
-
pages.push({
|
|
907
|
-
title: meta.title,
|
|
908
|
-
slug: file.replace(/\.md$/, ""),
|
|
909
|
-
summary: typeof meta.summary === "string" ? meta.summary : ""
|
|
910
|
-
});
|
|
911
|
-
}
|
|
1281
|
+
scanned.push({ slug: file.replace(/\.md$/, ""), meta });
|
|
912
1282
|
}
|
|
913
|
-
return
|
|
1283
|
+
return scanned;
|
|
1284
|
+
}
|
|
1285
|
+
async function collectPageSummaries(conceptsPath) {
|
|
1286
|
+
const scanned = await scanWikiPages(conceptsPath);
|
|
1287
|
+
return scanned.filter(({ meta }) => meta.title && typeof meta.title === "string" && !meta.orphaned).map(({ slug, meta }) => ({
|
|
1288
|
+
title: meta.title,
|
|
1289
|
+
slug,
|
|
1290
|
+
summary: typeof meta.summary === "string" ? meta.summary : ""
|
|
1291
|
+
}));
|
|
914
1292
|
}
|
|
915
1293
|
function stripWikilinks(text) {
|
|
916
1294
|
return text.replace(/\[\[([^\]]+)\]\]/g, "$1");
|
|
@@ -933,84 +1311,378 @@ function buildIndexContent(concepts, queries) {
|
|
|
933
1311
|
return lines.join("\n");
|
|
934
1312
|
}
|
|
935
1313
|
|
|
1314
|
+
// src/compiler/obsidian.ts
|
|
1315
|
+
import { readdir as readdir4 } from "fs/promises";
|
|
1316
|
+
import path11 from "path";
|
|
1317
|
+
var ABBREVIATION_MIN_WORDS = 3;
|
|
1318
|
+
var SWAP_CONJUNCTIONS = [" and ", " or "];
|
|
1319
|
+
function addObsidianMeta(frontmatter, conceptTitle, tags) {
|
|
1320
|
+
frontmatter.tags = tags;
|
|
1321
|
+
frontmatter.aliases = generateAliases(conceptTitle);
|
|
1322
|
+
}
|
|
1323
|
+
function generateAliases(title) {
|
|
1324
|
+
const aliases = [];
|
|
1325
|
+
const slug = slugify(title);
|
|
1326
|
+
if (slug !== title) {
|
|
1327
|
+
aliases.push(slug);
|
|
1328
|
+
}
|
|
1329
|
+
const swapAlias = generateSwapAlias(title);
|
|
1330
|
+
if (swapAlias) {
|
|
1331
|
+
aliases.push(swapAlias);
|
|
1332
|
+
}
|
|
1333
|
+
const abbreviation = generateAbbreviation(title);
|
|
1334
|
+
if (abbreviation) {
|
|
1335
|
+
aliases.push(abbreviation);
|
|
1336
|
+
}
|
|
1337
|
+
return aliases;
|
|
1338
|
+
}
|
|
1339
|
+
function generateSwapAlias(title) {
|
|
1340
|
+
for (const conjunction of SWAP_CONJUNCTIONS) {
|
|
1341
|
+
const index = title.toLowerCase().indexOf(conjunction);
|
|
1342
|
+
if (index === -1) continue;
|
|
1343
|
+
const before = title.slice(0, index);
|
|
1344
|
+
const after = title.slice(index + conjunction.length);
|
|
1345
|
+
const originalConjunction = title.slice(index, index + conjunction.length);
|
|
1346
|
+
return `${after}${originalConjunction}${before}`;
|
|
1347
|
+
}
|
|
1348
|
+
return null;
|
|
1349
|
+
}
|
|
1350
|
+
function generateAbbreviation(title) {
|
|
1351
|
+
const words = title.split(/\s+/);
|
|
1352
|
+
if (words.length < ABBREVIATION_MIN_WORDS) return null;
|
|
1353
|
+
const abbreviation = words.map((w) => w[0].toUpperCase()).join("");
|
|
1354
|
+
if (abbreviation === title) return null;
|
|
1355
|
+
return abbreviation;
|
|
1356
|
+
}
|
|
1357
|
+
async function generateMOC(root) {
|
|
1358
|
+
const conceptsPath = path11.join(root, CONCEPTS_DIR);
|
|
1359
|
+
const pages = await loadConceptPages(conceptsPath);
|
|
1360
|
+
const tagGroups = groupPagesByTag(pages);
|
|
1361
|
+
const content = buildMOCContent(tagGroups);
|
|
1362
|
+
await atomicWrite(path11.join(root, MOC_FILE), content);
|
|
1363
|
+
}
|
|
1364
|
+
async function loadConceptPages(conceptsPath) {
|
|
1365
|
+
let files;
|
|
1366
|
+
try {
|
|
1367
|
+
files = await readdir4(conceptsPath);
|
|
1368
|
+
} catch {
|
|
1369
|
+
return [];
|
|
1370
|
+
}
|
|
1371
|
+
const pages = [];
|
|
1372
|
+
for (const file of files) {
|
|
1373
|
+
if (!file.endsWith(".md")) continue;
|
|
1374
|
+
const content = await safeReadFile(path11.join(conceptsPath, file));
|
|
1375
|
+
if (!content) continue;
|
|
1376
|
+
const { meta } = parseFrontmatter(content);
|
|
1377
|
+
if (meta.orphaned) continue;
|
|
1378
|
+
const title = typeof meta.title === "string" ? meta.title : file.replace(/\.md$/, "");
|
|
1379
|
+
const tags = Array.isArray(meta.tags) ? meta.tags : [];
|
|
1380
|
+
pages.push({ title, tags });
|
|
1381
|
+
}
|
|
1382
|
+
return pages;
|
|
1383
|
+
}
|
|
1384
|
+
function groupPagesByTag(pages) {
|
|
1385
|
+
const groups = /* @__PURE__ */ new Map();
|
|
1386
|
+
for (const page of pages) {
|
|
1387
|
+
if (page.tags.length === 0) {
|
|
1388
|
+
appendToGroup(groups, "Uncategorized", page.title);
|
|
1389
|
+
continue;
|
|
1390
|
+
}
|
|
1391
|
+
for (const tag of page.tags) {
|
|
1392
|
+
appendToGroup(groups, tag, page.title);
|
|
1393
|
+
}
|
|
1394
|
+
}
|
|
1395
|
+
return groups;
|
|
1396
|
+
}
|
|
1397
|
+
function appendToGroup(groups, key, title) {
|
|
1398
|
+
const existing = groups.get(key);
|
|
1399
|
+
if (existing) {
|
|
1400
|
+
existing.push(title);
|
|
1401
|
+
} else {
|
|
1402
|
+
groups.set(key, [title]);
|
|
1403
|
+
}
|
|
1404
|
+
}
|
|
1405
|
+
function buildMOCContent(tagGroups) {
|
|
1406
|
+
const lines = ["# Map of Content", ""];
|
|
1407
|
+
const sortedTags = [...tagGroups.keys()].sort((a, b) => {
|
|
1408
|
+
if (a === "Uncategorized") return 1;
|
|
1409
|
+
if (b === "Uncategorized") return -1;
|
|
1410
|
+
return a.localeCompare(b);
|
|
1411
|
+
});
|
|
1412
|
+
for (const tag of sortedTags) {
|
|
1413
|
+
const titles = tagGroups.get(tag) ?? [];
|
|
1414
|
+
lines.push(`## ${tag}`, "");
|
|
1415
|
+
for (const title of titles.sort()) {
|
|
1416
|
+
lines.push(`- [[${title}]]`);
|
|
1417
|
+
}
|
|
1418
|
+
lines.push("");
|
|
1419
|
+
}
|
|
1420
|
+
return lines.join("\n");
|
|
1421
|
+
}
|
|
1422
|
+
|
|
1423
|
+
// src/utils/embeddings.ts
|
|
1424
|
+
import { readFile as readFile7, readdir as readdir5 } from "fs/promises";
|
|
1425
|
+
import { existsSync as existsSync3 } from "fs";
|
|
1426
|
+
import path12 from "path";
|
|
1427
|
+
function cosineSimilarity(a, b) {
|
|
1428
|
+
if (a.length !== b.length || a.length === 0) return 0;
|
|
1429
|
+
let dot = 0;
|
|
1430
|
+
let magA = 0;
|
|
1431
|
+
let magB = 0;
|
|
1432
|
+
for (let i = 0; i < a.length; i++) {
|
|
1433
|
+
dot += a[i] * b[i];
|
|
1434
|
+
magA += a[i] * a[i];
|
|
1435
|
+
magB += b[i] * b[i];
|
|
1436
|
+
}
|
|
1437
|
+
if (magA === 0 || magB === 0) return 0;
|
|
1438
|
+
return dot / (Math.sqrt(magA) * Math.sqrt(magB));
|
|
1439
|
+
}
|
|
1440
|
+
function findTopK(queryVec, store, k) {
|
|
1441
|
+
const scored = store.entries.map((entry) => ({
|
|
1442
|
+
entry,
|
|
1443
|
+
score: cosineSimilarity(queryVec, entry.vector)
|
|
1444
|
+
}));
|
|
1445
|
+
scored.sort((left, right) => right.score - left.score);
|
|
1446
|
+
return scored.slice(0, k).map((item) => item.entry);
|
|
1447
|
+
}
|
|
1448
|
+
async function readEmbeddingStore(root) {
|
|
1449
|
+
const filePath = path12.join(root, EMBEDDINGS_FILE);
|
|
1450
|
+
if (!existsSync3(filePath)) return null;
|
|
1451
|
+
const raw = await readFile7(filePath, "utf-8");
|
|
1452
|
+
return JSON.parse(raw);
|
|
1453
|
+
}
|
|
1454
|
+
async function writeEmbeddingStore(root, store) {
|
|
1455
|
+
const filePath = path12.join(root, EMBEDDINGS_FILE);
|
|
1456
|
+
await atomicWrite(filePath, JSON.stringify(store, null, 2));
|
|
1457
|
+
}
|
|
1458
|
+
async function findRelevantPages(root, question) {
|
|
1459
|
+
const store = await readEmbeddingStore(root);
|
|
1460
|
+
if (!store || store.entries.length === 0) return [];
|
|
1461
|
+
const queryVec = await getProvider().embed(question);
|
|
1462
|
+
return findTopK(queryVec, store, EMBEDDING_TOP_K).map((entry) => ({
|
|
1463
|
+
slug: entry.slug,
|
|
1464
|
+
title: entry.title,
|
|
1465
|
+
summary: entry.summary
|
|
1466
|
+
}));
|
|
1467
|
+
}
|
|
1468
|
+
async function collectPageRecords(root) {
|
|
1469
|
+
const records = [];
|
|
1470
|
+
for (const dir of [CONCEPTS_DIR, QUERIES_DIR]) {
|
|
1471
|
+
const absDir = path12.join(root, dir);
|
|
1472
|
+
let files;
|
|
1473
|
+
try {
|
|
1474
|
+
files = await readdir5(absDir);
|
|
1475
|
+
} catch {
|
|
1476
|
+
continue;
|
|
1477
|
+
}
|
|
1478
|
+
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
1479
|
+
const content = await safeReadFile(path12.join(absDir, file));
|
|
1480
|
+
const { meta } = parseFrontmatter(content);
|
|
1481
|
+
if (meta.orphaned || typeof meta.title !== "string") continue;
|
|
1482
|
+
records.push({
|
|
1483
|
+
slug: file.replace(/\.md$/, ""),
|
|
1484
|
+
title: meta.title,
|
|
1485
|
+
summary: typeof meta.summary === "string" ? meta.summary : ""
|
|
1486
|
+
});
|
|
1487
|
+
}
|
|
1488
|
+
}
|
|
1489
|
+
return records;
|
|
1490
|
+
}
|
|
1491
|
+
function buildEmbeddingText(record) {
|
|
1492
|
+
return record.summary ? `${record.title}
|
|
1493
|
+
|
|
1494
|
+
${record.summary}` : record.title;
|
|
1495
|
+
}
|
|
1496
|
+
async function embedPages(records, slugsToEmbed) {
|
|
1497
|
+
const provider = getProvider();
|
|
1498
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1499
|
+
const fresh = [];
|
|
1500
|
+
for (const record of records) {
|
|
1501
|
+
if (!slugsToEmbed.has(record.slug)) continue;
|
|
1502
|
+
const vector = await provider.embed(buildEmbeddingText(record));
|
|
1503
|
+
fresh.push({
|
|
1504
|
+
slug: record.slug,
|
|
1505
|
+
title: record.title,
|
|
1506
|
+
summary: record.summary,
|
|
1507
|
+
vector,
|
|
1508
|
+
updatedAt: now
|
|
1509
|
+
});
|
|
1510
|
+
}
|
|
1511
|
+
return fresh;
|
|
1512
|
+
}
|
|
1513
|
+
function resolveEmbeddingModel() {
|
|
1514
|
+
return EMBEDDING_MODELS[getActiveProviderName()] ?? EMBEDDING_MODELS.anthropic;
|
|
1515
|
+
}
|
|
1516
|
+
function mergeEntries(existing, fresh, liveSlugs) {
|
|
1517
|
+
const bySlug = /* @__PURE__ */ new Map();
|
|
1518
|
+
for (const entry of existing) {
|
|
1519
|
+
if (liveSlugs.has(entry.slug)) bySlug.set(entry.slug, entry);
|
|
1520
|
+
}
|
|
1521
|
+
for (const entry of fresh) {
|
|
1522
|
+
bySlug.set(entry.slug, entry);
|
|
1523
|
+
}
|
|
1524
|
+
return Array.from(bySlug.values());
|
|
1525
|
+
}
|
|
1526
|
+
async function updateEmbeddings(root, changedSlugs) {
|
|
1527
|
+
const records = await collectPageRecords(root);
|
|
1528
|
+
const liveSlugs = new Set(records.map((r) => r.slug));
|
|
1529
|
+
const toEmbed = new Set(changedSlugs.filter((slug) => liveSlugs.has(slug)));
|
|
1530
|
+
const existingStore = await readEmbeddingStore(root);
|
|
1531
|
+
const previousEntries = existingStore?.entries ?? [];
|
|
1532
|
+
if (!existingStore) {
|
|
1533
|
+
for (const record of records) toEmbed.add(record.slug);
|
|
1534
|
+
}
|
|
1535
|
+
if (toEmbed.size === 0 && previousEntries.every((e) => liveSlugs.has(e.slug))) {
|
|
1536
|
+
return;
|
|
1537
|
+
}
|
|
1538
|
+
const freshEntries = await embedPages(records, toEmbed);
|
|
1539
|
+
const mergedEntries = mergeEntries(previousEntries, freshEntries, liveSlugs);
|
|
1540
|
+
const dimensions = mergedEntries[0]?.vector.length ?? 0;
|
|
1541
|
+
const store = {
|
|
1542
|
+
version: 1,
|
|
1543
|
+
model: resolveEmbeddingModel(),
|
|
1544
|
+
dimensions,
|
|
1545
|
+
entries: mergedEntries
|
|
1546
|
+
};
|
|
1547
|
+
await writeEmbeddingStore(root, store);
|
|
1548
|
+
status("*", dim(`Embeddings updated (${mergedEntries.length} pages).`));
|
|
1549
|
+
}
|
|
1550
|
+
|
|
936
1551
|
// src/compiler/index.ts
|
|
937
1552
|
import pLimit from "p-limit";
|
|
1553
|
+
function emptyCompileResult() {
|
|
1554
|
+
return { compiled: 0, skipped: 0, deleted: 0, concepts: [], pages: [], errors: [] };
|
|
1555
|
+
}
|
|
938
1556
|
async function compile(root) {
|
|
1557
|
+
await compileAndReport(root);
|
|
1558
|
+
}
|
|
1559
|
+
async function compileAndReport(root) {
|
|
939
1560
|
header("llmwiki compile");
|
|
940
1561
|
const locked = await acquireLock(root);
|
|
941
1562
|
if (!locked) {
|
|
942
1563
|
status("!", error("Could not acquire lock. Try again later."));
|
|
943
|
-
return
|
|
1564
|
+
return {
|
|
1565
|
+
...emptyCompileResult(),
|
|
1566
|
+
errors: ["Could not acquire .llmwiki/lock \u2014 another compile is in progress."]
|
|
1567
|
+
};
|
|
944
1568
|
}
|
|
945
1569
|
try {
|
|
946
|
-
await runCompilePipeline(root);
|
|
1570
|
+
return await runCompilePipeline(root);
|
|
947
1571
|
} finally {
|
|
948
1572
|
await releaseLock(root);
|
|
949
1573
|
}
|
|
950
1574
|
}
|
|
1575
|
+
function bucketChanges(changes) {
|
|
1576
|
+
return {
|
|
1577
|
+
toCompile: changes.filter((c) => c.status === "new" || c.status === "changed"),
|
|
1578
|
+
deleted: changes.filter((c) => c.status === "deleted"),
|
|
1579
|
+
unchanged: changes.filter((c) => c.status === "unchanged")
|
|
1580
|
+
};
|
|
1581
|
+
}
|
|
1582
|
+
async function generatePagesPhase(root, extractions, frozenSlugs) {
|
|
1583
|
+
const merged = mergeExtractions(extractions, frozenSlugs);
|
|
1584
|
+
const limit = pLimit(COMPILE_CONCURRENCY);
|
|
1585
|
+
const errors = [];
|
|
1586
|
+
const pages = await Promise.all(
|
|
1587
|
+
merged.map((entry) => limit(async () => {
|
|
1588
|
+
const writeError = await generateMergedPage(root, entry);
|
|
1589
|
+
if (writeError) errors.push(writeError);
|
|
1590
|
+
return entry;
|
|
1591
|
+
}))
|
|
1592
|
+
);
|
|
1593
|
+
return { pages, errors };
|
|
1594
|
+
}
|
|
1595
|
+
async function persistExtractionStates(root, extractions) {
|
|
1596
|
+
for (const result of extractions) {
|
|
1597
|
+
if (result.concepts.length === 0) continue;
|
|
1598
|
+
await persistSourceState(root, result.sourcePath, result.sourceFile, result.concepts);
|
|
1599
|
+
}
|
|
1600
|
+
}
|
|
1601
|
+
function summarizeCompile(buckets, generation, extractions) {
|
|
1602
|
+
header("Compilation complete");
|
|
1603
|
+
status("\u2713", success(
|
|
1604
|
+
`${buckets.toCompile.length} compiled, ${buckets.unchanged.length} skipped, ${buckets.deleted.length} deleted`
|
|
1605
|
+
));
|
|
1606
|
+
if (buckets.toCompile.length > 0) {
|
|
1607
|
+
status("\u2192", dim('Next: llmwiki query "your question here"'));
|
|
1608
|
+
}
|
|
1609
|
+
const errors = [...generation.errors];
|
|
1610
|
+
for (const result of extractions) {
|
|
1611
|
+
if (result.concepts.length === 0) {
|
|
1612
|
+
errors.push(`No concepts extracted from ${result.sourceFile}`);
|
|
1613
|
+
}
|
|
1614
|
+
}
|
|
1615
|
+
return {
|
|
1616
|
+
compiled: buckets.toCompile.length,
|
|
1617
|
+
skipped: buckets.unchanged.length,
|
|
1618
|
+
deleted: buckets.deleted.length,
|
|
1619
|
+
concepts: generation.pages.map((entry) => entry.concept.concept),
|
|
1620
|
+
pages: generation.pages.map((entry) => entry.slug),
|
|
1621
|
+
errors
|
|
1622
|
+
};
|
|
1623
|
+
}
|
|
951
1624
|
async function runCompilePipeline(root) {
|
|
952
1625
|
const state = await readState(root);
|
|
953
1626
|
const changes = await detectChanges(root, state);
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
changes.push({ file, status: "changed" });
|
|
958
|
-
}
|
|
959
|
-
const toCompile = changes.filter((c) => c.status === "new" || c.status === "changed");
|
|
960
|
-
const deleted = changes.filter((c) => c.status === "deleted");
|
|
961
|
-
const unchanged = changes.filter((c) => c.status === "unchanged");
|
|
962
|
-
if (toCompile.length === 0 && deleted.length === 0) {
|
|
1627
|
+
augmentWithAffectedSources(changes, findAffectedSources(state, changes));
|
|
1628
|
+
const buckets = bucketChanges(changes);
|
|
1629
|
+
if (buckets.toCompile.length === 0 && buckets.deleted.length === 0) {
|
|
963
1630
|
status("\u2713", success("Nothing to compile \u2014 all sources up to date."));
|
|
964
|
-
return;
|
|
1631
|
+
return { ...emptyCompileResult(), skipped: buckets.unchanged.length };
|
|
965
1632
|
}
|
|
966
1633
|
printChangesSummary(changes);
|
|
1634
|
+
await markDeletedAsOrphaned(root, buckets.deleted, state);
|
|
1635
|
+
const frozenSlugs = findFrozenSlugs(state, changes);
|
|
1636
|
+
reportFrozenSlugs(frozenSlugs);
|
|
1637
|
+
const extractions = await runExtractionPhases(root, buckets.toCompile, state, changes);
|
|
1638
|
+
await freezeFailedExtractions(root, extractions, frozenSlugs);
|
|
1639
|
+
const generation = await generatePagesPhase(root, extractions, frozenSlugs);
|
|
1640
|
+
await persistExtractionStates(root, extractions);
|
|
1641
|
+
if (frozenSlugs.size > 0) {
|
|
1642
|
+
await orphanUnownedFrozenPages(root, frozenSlugs);
|
|
1643
|
+
}
|
|
1644
|
+
await persistFrozenSlugs(root, frozenSlugs, extractions);
|
|
1645
|
+
await finalizeWiki(root, generation.pages);
|
|
1646
|
+
return summarizeCompile(buckets, generation, extractions);
|
|
1647
|
+
}
|
|
1648
|
+
function augmentWithAffectedSources(changes, affected) {
|
|
1649
|
+
for (const file of affected) {
|
|
1650
|
+
status("~", info(`${file} [affected by shared concept]`));
|
|
1651
|
+
changes.push({ file, status: "changed" });
|
|
1652
|
+
}
|
|
1653
|
+
}
|
|
1654
|
+
async function markDeletedAsOrphaned(root, deleted, state) {
|
|
967
1655
|
for (const del of deleted) {
|
|
968
1656
|
await markOrphaned(root, del.file, state);
|
|
969
1657
|
}
|
|
970
|
-
|
|
1658
|
+
}
|
|
1659
|
+
function reportFrozenSlugs(frozenSlugs) {
|
|
971
1660
|
for (const slug of frozenSlugs) {
|
|
972
1661
|
status("i", dim(`Frozen: ${slug} (shared with deleted source)`));
|
|
973
1662
|
}
|
|
1663
|
+
}
|
|
1664
|
+
async function runExtractionPhases(root, toCompile, state, allChanges) {
|
|
974
1665
|
const extractions = [];
|
|
975
1666
|
for (const change of toCompile) {
|
|
976
1667
|
extractions.push(await extractForSource(root, change.file));
|
|
977
1668
|
}
|
|
978
|
-
const lateAffected = findLateAffectedSources(extractions, state,
|
|
1669
|
+
const lateAffected = findLateAffectedSources(extractions, state, allChanges);
|
|
979
1670
|
for (const file of lateAffected) {
|
|
980
1671
|
status("~", info(`${file} [shares concept with new source]`));
|
|
981
1672
|
extractions.push(await extractForSource(root, file));
|
|
982
1673
|
}
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
const
|
|
987
|
-
|
|
988
|
-
await generateMergedPage(root, entry);
|
|
989
|
-
return entry;
|
|
990
|
-
}))
|
|
991
|
-
);
|
|
992
|
-
const allChangedSlugs = pageResults.map((e) => e.slug);
|
|
993
|
-
const allNewSlugs = pageResults.filter((e) => e.concept.is_new).map((e) => e.slug);
|
|
994
|
-
for (const result of extractions) {
|
|
995
|
-
if (result.concepts.length === 0) continue;
|
|
996
|
-
await persistSourceState(root, result.sourcePath, result.sourceFile, result.concepts);
|
|
997
|
-
}
|
|
998
|
-
if (frozenSlugs.size > 0) {
|
|
999
|
-
await orphanUnownedFrozenPages(root, frozenSlugs);
|
|
1000
|
-
}
|
|
1001
|
-
await persistFrozenSlugs(root, frozenSlugs, extractions);
|
|
1674
|
+
return extractions;
|
|
1675
|
+
}
|
|
1676
|
+
async function finalizeWiki(root, pages) {
|
|
1677
|
+
const allChangedSlugs = pages.map((entry) => entry.slug);
|
|
1678
|
+
const allNewSlugs = pages.filter((entry) => entry.concept.is_new).map((entry) => entry.slug);
|
|
1002
1679
|
if (allChangedSlugs.length > 0) {
|
|
1003
1680
|
status("\u{1F517}", info("Resolving interlinks..."));
|
|
1004
1681
|
await resolveLinks(root, allChangedSlugs, allNewSlugs);
|
|
1005
1682
|
}
|
|
1006
1683
|
await generateIndex(root);
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
`${toCompile.length} compiled, ${unchanged.length} skipped, ${deleted.length} deleted`
|
|
1010
|
-
));
|
|
1011
|
-
if (toCompile.length > 0) {
|
|
1012
|
-
status("\u2192", dim('Next: llmwiki query "your question here"'));
|
|
1013
|
-
}
|
|
1684
|
+
await generateMOC(root);
|
|
1685
|
+
await safelyUpdateEmbeddings(root, allChangedSlugs);
|
|
1014
1686
|
}
|
|
1015
1687
|
function printChangesSummary(changes) {
|
|
1016
1688
|
const iconMap = {
|
|
@@ -1033,9 +1705,9 @@ function printChangesSummary(changes) {
|
|
|
1033
1705
|
}
|
|
1034
1706
|
async function extractForSource(root, sourceFile) {
|
|
1035
1707
|
status("*", info(`Extracting: ${sourceFile}`));
|
|
1036
|
-
const sourcePath =
|
|
1037
|
-
const sourceContent = await
|
|
1038
|
-
const existingIndex = await safeReadFile(
|
|
1708
|
+
const sourcePath = path13.join(root, SOURCES_DIR, sourceFile);
|
|
1709
|
+
const sourceContent = await readFile8(sourcePath, "utf-8");
|
|
1710
|
+
const existingIndex = await safeReadFile(path13.join(root, INDEX_FILE));
|
|
1039
1711
|
const concepts = await extractConcepts(sourceContent, existingIndex);
|
|
1040
1712
|
if (concepts.length > 0) {
|
|
1041
1713
|
const names = concepts.map((c) => c.concept).join(", ");
|
|
@@ -1073,7 +1745,7 @@ ${result.sourceContent}`
|
|
|
1073
1745
|
return Array.from(bySlug.values());
|
|
1074
1746
|
}
|
|
1075
1747
|
async function generateMergedPage(root, entry) {
|
|
1076
|
-
const pagePath =
|
|
1748
|
+
const pagePath = path13.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
|
|
1077
1749
|
const existingPage = await safeReadFile(pagePath);
|
|
1078
1750
|
const relatedPages = await loadRelatedPages(root, entry.slug);
|
|
1079
1751
|
const system = buildPagePrompt(
|
|
@@ -1091,18 +1763,20 @@ async function generateMergedPage(root, entry) {
|
|
|
1091
1763
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1092
1764
|
const existing = existingPage ? parseFrontmatter(existingPage) : null;
|
|
1093
1765
|
const createdAt = existing?.meta.createdAt && typeof existing.meta.createdAt === "string" ? existing.meta.createdAt : now;
|
|
1094
|
-
const
|
|
1766
|
+
const frontmatterFields = {
|
|
1095
1767
|
title: entry.concept.concept,
|
|
1096
1768
|
summary: entry.concept.summary,
|
|
1097
1769
|
sources: entry.sourceFiles,
|
|
1098
1770
|
createdAt,
|
|
1099
1771
|
updatedAt: now
|
|
1100
|
-
}
|
|
1772
|
+
};
|
|
1773
|
+
addObsidianMeta(frontmatterFields, entry.concept.concept, entry.concept.tags ?? []);
|
|
1774
|
+
const frontmatter = buildFrontmatter(frontmatterFields);
|
|
1101
1775
|
const fullPage = `${frontmatter}
|
|
1102
1776
|
|
|
1103
1777
|
${pageBody}
|
|
1104
1778
|
`;
|
|
1105
|
-
await writePageIfValid(pagePath, fullPage, entry.concept.concept);
|
|
1779
|
+
return await writePageIfValid(pagePath, fullPage, entry.concept.concept);
|
|
1106
1780
|
}
|
|
1107
1781
|
async function extractConcepts(sourceContent, existingIndex) {
|
|
1108
1782
|
const system = buildExtractionPrompt(sourceContent, existingIndex);
|
|
@@ -1114,17 +1788,17 @@ async function extractConcepts(sourceContent, existingIndex) {
|
|
|
1114
1788
|
return parseConcepts(rawOutput);
|
|
1115
1789
|
}
|
|
1116
1790
|
async function loadRelatedPages(root, excludeSlug) {
|
|
1117
|
-
const conceptsPath =
|
|
1791
|
+
const conceptsPath = path13.join(root, CONCEPTS_DIR);
|
|
1118
1792
|
let files;
|
|
1119
1793
|
try {
|
|
1120
|
-
files = await
|
|
1794
|
+
files = await readdir6(conceptsPath);
|
|
1121
1795
|
} catch {
|
|
1122
1796
|
return "";
|
|
1123
1797
|
}
|
|
1124
1798
|
const related = files.filter((f) => f.endsWith(".md") && f !== `${excludeSlug}.md`).slice(0, 5);
|
|
1125
1799
|
const contents = [];
|
|
1126
1800
|
for (const f of related) {
|
|
1127
|
-
const content = await safeReadFile(
|
|
1801
|
+
const content = await safeReadFile(path13.join(conceptsPath, f));
|
|
1128
1802
|
if (!content) continue;
|
|
1129
1803
|
const { meta } = parseFrontmatter(content);
|
|
1130
1804
|
if (meta.orphaned) continue;
|
|
@@ -1135,9 +1809,18 @@ async function loadRelatedPages(root, excludeSlug) {
|
|
|
1135
1809
|
async function writePageIfValid(pagePath, content, conceptTitle) {
|
|
1136
1810
|
if (!validateWikiPage(content)) {
|
|
1137
1811
|
status("!", warn(`Invalid page for "${conceptTitle}" \u2014 skipped.`));
|
|
1138
|
-
return
|
|
1812
|
+
return `Invalid page for "${conceptTitle}" \u2014 failed validation`;
|
|
1139
1813
|
}
|
|
1140
1814
|
await atomicWrite(pagePath, content);
|
|
1815
|
+
return null;
|
|
1816
|
+
}
|
|
1817
|
+
async function safelyUpdateEmbeddings(root, changedSlugs) {
|
|
1818
|
+
try {
|
|
1819
|
+
await updateEmbeddings(root, changedSlugs);
|
|
1820
|
+
} catch (err) {
|
|
1821
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
1822
|
+
status("!", warn(`Skipped embeddings update: ${message}`));
|
|
1823
|
+
}
|
|
1141
1824
|
}
|
|
1142
1825
|
async function persistSourceState(root, sourcePath, sourceFile, concepts) {
|
|
1143
1826
|
const hash = await hashFile(sourcePath);
|
|
@@ -1151,7 +1834,7 @@ async function persistSourceState(root, sourcePath, sourceFile, concepts) {
|
|
|
1151
1834
|
|
|
1152
1835
|
// src/commands/compile.ts
|
|
1153
1836
|
async function compileCommand() {
|
|
1154
|
-
if (!
|
|
1837
|
+
if (!existsSync4(SOURCES_DIR)) {
|
|
1155
1838
|
status(
|
|
1156
1839
|
"!",
|
|
1157
1840
|
warn("No sources found. Run `llmwiki ingest <url>` first.")
|
|
@@ -1162,8 +1845,8 @@ async function compileCommand() {
|
|
|
1162
1845
|
}
|
|
1163
1846
|
|
|
1164
1847
|
// src/commands/query.ts
|
|
1165
|
-
import { existsSync as
|
|
1166
|
-
import
|
|
1848
|
+
import { existsSync as existsSync5 } from "fs";
|
|
1849
|
+
import path14 from "path";
|
|
1167
1850
|
var PAGE_DIRS = [CONCEPTS_DIR, QUERIES_DIR];
|
|
1168
1851
|
var PAGE_SELECTION_TOOL = {
|
|
1169
1852
|
name: "select_pages",
|
|
@@ -1208,12 +1891,35 @@ ${indexContent}`;
|
|
|
1208
1891
|
return { pages: [], reasoning: "Failed to parse page selection response" };
|
|
1209
1892
|
}
|
|
1210
1893
|
}
|
|
1894
|
+
function buildFilteredIndex(candidates) {
|
|
1895
|
+
return candidates.map((entry) => `- **${entry.slug}**: ${entry.title} \u2014 ${entry.summary}`).join("\n");
|
|
1896
|
+
}
|
|
1897
|
+
async function selectRelevantPages(root, question) {
|
|
1898
|
+
const candidates = await tryFindRelevantPages(root, question);
|
|
1899
|
+
if (candidates.length > 0) {
|
|
1900
|
+
const filteredIndex = buildFilteredIndex(candidates);
|
|
1901
|
+
const { pages: rawPages2, reasoning: reasoning2 } = await selectPages(question, filteredIndex);
|
|
1902
|
+
return { pages: rawPages2, rawPages: rawPages2, reasoning: reasoning2 };
|
|
1903
|
+
}
|
|
1904
|
+
const indexContent = await safeReadFile(path14.join(root, INDEX_FILE));
|
|
1905
|
+
const { pages: rawPages, reasoning } = await selectPages(question, indexContent);
|
|
1906
|
+
return { pages: rawPages.map((p) => slugify(p)), rawPages, reasoning };
|
|
1907
|
+
}
|
|
1908
|
+
async function tryFindRelevantPages(root, question) {
|
|
1909
|
+
try {
|
|
1910
|
+
return await findRelevantPages(root, question);
|
|
1911
|
+
} catch (err) {
|
|
1912
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
1913
|
+
status("!", dim(`Semantic pre-filter unavailable (${message}); using full index.`));
|
|
1914
|
+
return [];
|
|
1915
|
+
}
|
|
1916
|
+
}
|
|
1211
1917
|
async function loadSelectedPages(root, slugs) {
|
|
1212
1918
|
const sections = [];
|
|
1213
1919
|
for (const slug of slugs) {
|
|
1214
1920
|
let content = "";
|
|
1215
1921
|
for (const dir of PAGE_DIRS) {
|
|
1216
|
-
const candidate = await safeReadFile(
|
|
1922
|
+
const candidate = await safeReadFile(path14.join(root, dir, `${slug}.md`));
|
|
1217
1923
|
if (!candidate) continue;
|
|
1218
1924
|
const { meta } = parseFrontmatter(candidate);
|
|
1219
1925
|
if (meta.orphaned) continue;
|
|
@@ -1229,20 +1935,18 @@ ${content}`);
|
|
|
1229
1935
|
}
|
|
1230
1936
|
return sections.join("\n\n");
|
|
1231
1937
|
}
|
|
1232
|
-
|
|
1233
|
-
|
|
1938
|
+
var ANSWER_SYSTEM_PROMPT = "You are a knowledge assistant. Answer the question using ONLY the wiki content provided. Cite specific pages using [[Page Title]] wikilinks. If the wiki doesn't contain enough information, say so.";
|
|
1939
|
+
async function callAnswerLLM(question, pagesContent, onToken) {
|
|
1234
1940
|
const userMessage = `Question: ${question}
|
|
1235
1941
|
|
|
1236
1942
|
Relevant wiki pages:
|
|
1237
1943
|
${pagesContent}`;
|
|
1238
|
-
|
|
1239
|
-
system:
|
|
1944
|
+
return callClaude({
|
|
1945
|
+
system: ANSWER_SYSTEM_PROMPT,
|
|
1240
1946
|
messages: [{ role: "user", content: userMessage }],
|
|
1241
|
-
stream:
|
|
1242
|
-
onToken
|
|
1947
|
+
stream: Boolean(onToken),
|
|
1948
|
+
onToken
|
|
1243
1949
|
});
|
|
1244
|
-
process.stdout.write("\n");
|
|
1245
|
-
return answer;
|
|
1246
1950
|
}
|
|
1247
1951
|
function summarizeAnswer(answer) {
|
|
1248
1952
|
const firstLine = answer.trim().split(/\n/)[0] ?? "";
|
|
@@ -1251,7 +1955,7 @@ function summarizeAnswer(answer) {
|
|
|
1251
1955
|
}
|
|
1252
1956
|
async function saveQueryPage(root, question, answer) {
|
|
1253
1957
|
const slug = slugify(question);
|
|
1254
|
-
const filePath =
|
|
1958
|
+
const filePath = path14.join(root, QUERIES_DIR, `${slug}.md`);
|
|
1255
1959
|
const frontmatter = buildFrontmatter({
|
|
1256
1960
|
title: question,
|
|
1257
1961
|
summary: summarizeAnswer(answer),
|
|
@@ -1268,27 +1972,52 @@ ${answer}
|
|
|
1268
1972
|
success(`Saved query \u2192 ${source(filePath)}`)
|
|
1269
1973
|
);
|
|
1270
1974
|
await generateIndex(root);
|
|
1975
|
+
try {
|
|
1976
|
+
await updateEmbeddings(root, [slug]);
|
|
1977
|
+
} catch (err) {
|
|
1978
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
1979
|
+
status("!", warn(`Skipped embeddings update: ${message}`));
|
|
1980
|
+
}
|
|
1981
|
+
return slug;
|
|
1982
|
+
}
|
|
1983
|
+
async function generateAnswer(root, question, options = {}) {
|
|
1984
|
+
if (!existsSync5(path14.join(root, INDEX_FILE))) {
|
|
1985
|
+
throw new Error("Wiki index not found. Run `llmwiki compile` first.");
|
|
1986
|
+
}
|
|
1987
|
+
const { pages, reasoning } = await selectRelevantPages(root, question);
|
|
1988
|
+
options.onPageSelection?.(pages, reasoning);
|
|
1989
|
+
const pagesContent = await loadSelectedPages(root, pages);
|
|
1990
|
+
if (!pagesContent) {
|
|
1991
|
+
return { answer: "", selectedPages: pages, reasoning };
|
|
1992
|
+
}
|
|
1993
|
+
const answer = await callAnswerLLM(question, pagesContent, options.onToken);
|
|
1994
|
+
let saved;
|
|
1995
|
+
if (options.save) {
|
|
1996
|
+
saved = await saveQueryPage(root, question, answer);
|
|
1997
|
+
}
|
|
1998
|
+
return { answer, selectedPages: pages, reasoning, saved };
|
|
1271
1999
|
}
|
|
1272
2000
|
async function queryCommand(root, question, options) {
|
|
1273
|
-
if (!
|
|
2001
|
+
if (!existsSync5(path14.join(root, INDEX_FILE))) {
|
|
1274
2002
|
status("!", error("Wiki index not found. Run `llmwiki compile` first."));
|
|
1275
2003
|
return;
|
|
1276
2004
|
}
|
|
1277
2005
|
header("Selecting relevant pages");
|
|
1278
|
-
const
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
2006
|
+
const result = await generateAnswer(root, question, {
|
|
2007
|
+
save: options.save,
|
|
2008
|
+
onToken: (text) => process.stdout.write(text),
|
|
2009
|
+
onPageSelection: (pages, reasoning) => {
|
|
2010
|
+
status("i", dim(`Reasoning: ${reasoning}`));
|
|
2011
|
+
status("*", info(`Selected ${pages.length} page(s): ${pages.join(", ")}`));
|
|
2012
|
+
header("Generating answer");
|
|
2013
|
+
}
|
|
2014
|
+
});
|
|
2015
|
+
process.stdout.write("\n");
|
|
2016
|
+
if (!result.answer) {
|
|
1286
2017
|
status("!", error("No matching pages found. Try refining your question."));
|
|
1287
2018
|
return;
|
|
1288
2019
|
}
|
|
1289
|
-
|
|
1290
|
-
if (options.save) {
|
|
1291
|
-
await saveQueryPage(root, question, answer);
|
|
2020
|
+
if (result.saved) {
|
|
1292
2021
|
status("\u2192", dim("Saved. Future queries will use this answer as context."));
|
|
1293
2022
|
} else {
|
|
1294
2023
|
status("\u2192", dim("Tip: use --save to add this answer to your wiki"));
|
|
@@ -1297,12 +2026,12 @@ async function queryCommand(root, question, options) {
|
|
|
1297
2026
|
|
|
1298
2027
|
// src/commands/watch.ts
|
|
1299
2028
|
import { watch as chokidarWatch } from "chokidar";
|
|
1300
|
-
import { existsSync as
|
|
1301
|
-
import
|
|
2029
|
+
import { existsSync as existsSync6 } from "fs";
|
|
2030
|
+
import path15 from "path";
|
|
1302
2031
|
var DEBOUNCE_MS = 500;
|
|
1303
2032
|
async function watchCommand() {
|
|
1304
|
-
const sourcesPath =
|
|
1305
|
-
if (!
|
|
2033
|
+
const sourcesPath = path15.resolve(SOURCES_DIR);
|
|
2034
|
+
if (!existsSync6(sourcesPath)) {
|
|
1306
2035
|
status(
|
|
1307
2036
|
"!",
|
|
1308
2037
|
warn("No sources/ directory found. Run `llmwiki ingest <url>` first.")
|
|
@@ -1336,7 +2065,7 @@ async function watchCommand() {
|
|
|
1336
2065
|
const scheduleCompile = (eventPath, event) => {
|
|
1337
2066
|
status(
|
|
1338
2067
|
"~",
|
|
1339
|
-
dim(`${event}: ${
|
|
2068
|
+
dim(`${event}: ${path15.basename(eventPath)}`)
|
|
1340
2069
|
);
|
|
1341
2070
|
if (debounceTimer) clearTimeout(debounceTimer);
|
|
1342
2071
|
debounceTimer = setTimeout(triggerCompile, DEBOUNCE_MS);
|
|
@@ -1350,6 +2079,609 @@ async function watchCommand() {
|
|
|
1350
2079
|
});
|
|
1351
2080
|
}
|
|
1352
2081
|
|
|
2082
|
+
// src/linter/rules.ts
|
|
2083
|
+
import { readdir as readdir7, readFile as readFile9 } from "fs/promises";
|
|
2084
|
+
import { existsSync as existsSync7 } from "fs";
|
|
2085
|
+
import path16 from "path";
|
|
2086
|
+
var MIN_BODY_LENGTH = 50;
|
|
2087
|
+
var WIKILINK_PATTERN = /\[\[([^\]]+)\]\]/g;
|
|
2088
|
+
var CITATION_PATTERN = /\^\[([^\]]+)\]/g;
|
|
2089
|
+
function findMatchesInContent(content, pattern) {
|
|
2090
|
+
const results = [];
|
|
2091
|
+
const lines = content.split("\n");
|
|
2092
|
+
for (let i = 0; i < lines.length; i++) {
|
|
2093
|
+
const matches = lines[i].matchAll(pattern);
|
|
2094
|
+
for (const match of matches) {
|
|
2095
|
+
results.push({ captured: match[1], line: i + 1 });
|
|
2096
|
+
}
|
|
2097
|
+
}
|
|
2098
|
+
return results;
|
|
2099
|
+
}
|
|
2100
|
+
async function readMarkdownFiles(dirPath) {
|
|
2101
|
+
if (!existsSync7(dirPath)) return [];
|
|
2102
|
+
const entries = await readdir7(dirPath);
|
|
2103
|
+
const mdFiles = entries.filter((f) => f.endsWith(".md"));
|
|
2104
|
+
const results = await Promise.all(
|
|
2105
|
+
mdFiles.map(async (fileName) => {
|
|
2106
|
+
const filePath = path16.join(dirPath, fileName);
|
|
2107
|
+
const content = await readFile9(filePath, "utf-8");
|
|
2108
|
+
return { filePath, content };
|
|
2109
|
+
})
|
|
2110
|
+
);
|
|
2111
|
+
return results;
|
|
2112
|
+
}
|
|
2113
|
+
async function collectAllPages(root) {
|
|
2114
|
+
const conceptPages = await readMarkdownFiles(path16.join(root, CONCEPTS_DIR));
|
|
2115
|
+
const queryPages = await readMarkdownFiles(path16.join(root, QUERIES_DIR));
|
|
2116
|
+
return [...conceptPages, ...queryPages];
|
|
2117
|
+
}
|
|
2118
|
+
function buildPageSlugSet(pages) {
|
|
2119
|
+
const slugs = /* @__PURE__ */ new Set();
|
|
2120
|
+
for (const page of pages) {
|
|
2121
|
+
const baseName = path16.basename(page.filePath, ".md");
|
|
2122
|
+
slugs.add(baseName.toLowerCase());
|
|
2123
|
+
}
|
|
2124
|
+
return slugs;
|
|
2125
|
+
}
|
|
2126
|
+
async function checkBrokenWikilinks(root) {
|
|
2127
|
+
const pages = await collectAllPages(root);
|
|
2128
|
+
const existingSlugs = buildPageSlugSet(pages);
|
|
2129
|
+
const results = [];
|
|
2130
|
+
for (const page of pages) {
|
|
2131
|
+
for (const { captured, line } of findMatchesInContent(page.content, WIKILINK_PATTERN)) {
|
|
2132
|
+
const linkSlug = slugify(captured);
|
|
2133
|
+
if (!existingSlugs.has(linkSlug)) {
|
|
2134
|
+
results.push({
|
|
2135
|
+
rule: "broken-wikilink",
|
|
2136
|
+
severity: "error",
|
|
2137
|
+
file: page.filePath,
|
|
2138
|
+
message: `Broken wikilink [[${captured}]] \u2014 no matching page found`,
|
|
2139
|
+
line
|
|
2140
|
+
});
|
|
2141
|
+
}
|
|
2142
|
+
}
|
|
2143
|
+
}
|
|
2144
|
+
return results;
|
|
2145
|
+
}
|
|
2146
|
+
async function checkOrphanedPages(root) {
|
|
2147
|
+
const pages = await collectAllPages(root);
|
|
2148
|
+
const results = [];
|
|
2149
|
+
for (const page of pages) {
|
|
2150
|
+
const { meta } = parseFrontmatter(page.content);
|
|
2151
|
+
if (meta.orphaned === true) {
|
|
2152
|
+
results.push({
|
|
2153
|
+
rule: "orphaned-page",
|
|
2154
|
+
severity: "warning",
|
|
2155
|
+
file: page.filePath,
|
|
2156
|
+
message: `Page is marked as orphaned`
|
|
2157
|
+
});
|
|
2158
|
+
}
|
|
2159
|
+
}
|
|
2160
|
+
return results;
|
|
2161
|
+
}
|
|
2162
|
+
async function checkMissingSummaries(root) {
|
|
2163
|
+
const pages = await collectAllPages(root);
|
|
2164
|
+
const results = [];
|
|
2165
|
+
for (const page of pages) {
|
|
2166
|
+
const { meta } = parseFrontmatter(page.content);
|
|
2167
|
+
const summary = meta.summary;
|
|
2168
|
+
const isMissing = !summary || typeof summary === "string" && summary.trim() === "";
|
|
2169
|
+
if (isMissing) {
|
|
2170
|
+
results.push({
|
|
2171
|
+
rule: "missing-summary",
|
|
2172
|
+
severity: "warning",
|
|
2173
|
+
file: page.filePath,
|
|
2174
|
+
message: `Page has no summary in frontmatter`
|
|
2175
|
+
});
|
|
2176
|
+
}
|
|
2177
|
+
}
|
|
2178
|
+
return results;
|
|
2179
|
+
}
|
|
2180
|
+
async function checkDuplicateConcepts(root) {
|
|
2181
|
+
const pages = await collectAllPages(root);
|
|
2182
|
+
const titleMap = /* @__PURE__ */ new Map();
|
|
2183
|
+
for (const page of pages) {
|
|
2184
|
+
const { meta } = parseFrontmatter(page.content);
|
|
2185
|
+
const title = typeof meta.title === "string" ? meta.title : "";
|
|
2186
|
+
if (!title) continue;
|
|
2187
|
+
const normalizedTitle = title.toLowerCase().trim();
|
|
2188
|
+
const existing = titleMap.get(normalizedTitle) ?? [];
|
|
2189
|
+
existing.push(page.filePath);
|
|
2190
|
+
titleMap.set(normalizedTitle, existing);
|
|
2191
|
+
}
|
|
2192
|
+
const results = [];
|
|
2193
|
+
for (const [title, files] of titleMap) {
|
|
2194
|
+
if (files.length <= 1) continue;
|
|
2195
|
+
for (const file of files) {
|
|
2196
|
+
results.push({
|
|
2197
|
+
rule: "duplicate-concept",
|
|
2198
|
+
severity: "error",
|
|
2199
|
+
file,
|
|
2200
|
+
message: `Duplicate title "${title}" \u2014 also in ${files.filter((f) => f !== file).join(", ")}`
|
|
2201
|
+
});
|
|
2202
|
+
}
|
|
2203
|
+
}
|
|
2204
|
+
return results;
|
|
2205
|
+
}
|
|
2206
|
+
async function checkEmptyPages(root) {
|
|
2207
|
+
const pages = await collectAllPages(root);
|
|
2208
|
+
const results = [];
|
|
2209
|
+
for (const page of pages) {
|
|
2210
|
+
const { meta, body } = parseFrontmatter(page.content);
|
|
2211
|
+
const hasTitle = typeof meta.title === "string" && meta.title.trim() !== "";
|
|
2212
|
+
const isBodyEmpty = body.trim().length < MIN_BODY_LENGTH;
|
|
2213
|
+
if (hasTitle && isBodyEmpty) {
|
|
2214
|
+
results.push({
|
|
2215
|
+
rule: "empty-page",
|
|
2216
|
+
severity: "warning",
|
|
2217
|
+
file: page.filePath,
|
|
2218
|
+
message: `Page body is empty or too short (< ${MIN_BODY_LENGTH} chars)`
|
|
2219
|
+
});
|
|
2220
|
+
}
|
|
2221
|
+
}
|
|
2222
|
+
return results;
|
|
2223
|
+
}
|
|
2224
|
+
async function checkBrokenCitations(root) {
|
|
2225
|
+
const pages = await collectAllPages(root);
|
|
2226
|
+
const sourcesDir = path16.join(root, SOURCES_DIR);
|
|
2227
|
+
const results = [];
|
|
2228
|
+
for (const page of pages) {
|
|
2229
|
+
for (const { captured, line } of findMatchesInContent(page.content, CITATION_PATTERN)) {
|
|
2230
|
+
const citedPath = path16.join(sourcesDir, captured);
|
|
2231
|
+
if (!existsSync7(citedPath)) {
|
|
2232
|
+
results.push({
|
|
2233
|
+
rule: "broken-citation",
|
|
2234
|
+
severity: "error",
|
|
2235
|
+
file: page.filePath,
|
|
2236
|
+
message: `Broken citation ^[${captured}] \u2014 source file not found`,
|
|
2237
|
+
line
|
|
2238
|
+
});
|
|
2239
|
+
}
|
|
2240
|
+
}
|
|
2241
|
+
}
|
|
2242
|
+
return results;
|
|
2243
|
+
}
|
|
2244
|
+
|
|
2245
|
+
// src/linter/index.ts
|
|
2246
|
+
var ALL_RULES = [
|
|
2247
|
+
checkBrokenWikilinks,
|
|
2248
|
+
checkOrphanedPages,
|
|
2249
|
+
checkMissingSummaries,
|
|
2250
|
+
checkDuplicateConcepts,
|
|
2251
|
+
checkEmptyPages,
|
|
2252
|
+
checkBrokenCitations
|
|
2253
|
+
];
|
|
2254
|
+
function countBySeverity(results, severity) {
|
|
2255
|
+
return results.filter((r) => r.severity === severity).length;
|
|
2256
|
+
}
|
|
2257
|
+
async function lint(root) {
|
|
2258
|
+
const ruleResults = await Promise.all(
|
|
2259
|
+
ALL_RULES.map((rule) => rule(root))
|
|
2260
|
+
);
|
|
2261
|
+
const results = ruleResults.flat();
|
|
2262
|
+
return {
|
|
2263
|
+
errors: countBySeverity(results, "error"),
|
|
2264
|
+
warnings: countBySeverity(results, "warning"),
|
|
2265
|
+
info: countBySeverity(results, "info"),
|
|
2266
|
+
results
|
|
2267
|
+
};
|
|
2268
|
+
}
|
|
2269
|
+
|
|
2270
|
+
// src/commands/lint.ts
|
|
2271
|
+
var SEVERITY_FORMATTERS = {
|
|
2272
|
+
error,
|
|
2273
|
+
warning: warn,
|
|
2274
|
+
info
|
|
2275
|
+
};
|
|
2276
|
+
var SEVERITY_ICONS = {
|
|
2277
|
+
error: "x",
|
|
2278
|
+
warning: "!",
|
|
2279
|
+
info: "i"
|
|
2280
|
+
};
|
|
2281
|
+
function printResult(result) {
|
|
2282
|
+
const formatter = SEVERITY_FORMATTERS[result.severity];
|
|
2283
|
+
const icon = SEVERITY_ICONS[result.severity];
|
|
2284
|
+
const location = result.line ? `${result.file}:${result.line}` : result.file;
|
|
2285
|
+
status(icon, `${formatter(result.severity)} ${dim(location)} ${result.message}`);
|
|
2286
|
+
}
|
|
2287
|
+
async function lintCommand() {
|
|
2288
|
+
header("Linting wiki");
|
|
2289
|
+
const summary = await lint(process.cwd());
|
|
2290
|
+
for (const result of summary.results) {
|
|
2291
|
+
printResult(result);
|
|
2292
|
+
}
|
|
2293
|
+
console.log();
|
|
2294
|
+
const summaryLine = [
|
|
2295
|
+
error(`${summary.errors} error(s)`),
|
|
2296
|
+
warn(`${summary.warnings} warning(s)`),
|
|
2297
|
+
info(`${summary.info} info`)
|
|
2298
|
+
].join(", ");
|
|
2299
|
+
status("*", summaryLine);
|
|
2300
|
+
if (summary.errors > 0) {
|
|
2301
|
+
process.exit(1);
|
|
2302
|
+
}
|
|
2303
|
+
}
|
|
2304
|
+
|
|
2305
|
+
// src/mcp/server.ts
|
|
2306
|
+
import { McpServer as McpServer2 } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2307
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
2308
|
+
|
|
2309
|
+
// src/mcp/tools.ts
|
|
2310
|
+
import path17 from "path";
|
|
2311
|
+
import { z } from "zod";
|
|
2312
|
+
|
|
2313
|
+
// src/mcp/provider-check.ts
|
|
2314
|
+
var PROVIDER_KEY_VARS = {
|
|
2315
|
+
anthropic: "ANTHROPIC_API_KEY",
|
|
2316
|
+
openai: "OPENAI_API_KEY",
|
|
2317
|
+
ollama: null,
|
|
2318
|
+
minimax: "MINIMAX_API_KEY"
|
|
2319
|
+
};
|
|
2320
|
+
function ensureProviderAvailable() {
|
|
2321
|
+
const provider = process.env.LLMWIKI_PROVIDER ?? DEFAULT_PROVIDER;
|
|
2322
|
+
if (provider === "anthropic") {
|
|
2323
|
+
const auth = resolveAnthropicAuthFromEnv();
|
|
2324
|
+
if (!auth.apiKey && !auth.authToken) {
|
|
2325
|
+
throw new Error(
|
|
2326
|
+
'Anthropic credentials are required for the "anthropic" provider. Set ANTHROPIC_API_KEY or ANTHROPIC_AUTH_TOKEN.'
|
|
2327
|
+
);
|
|
2328
|
+
}
|
|
2329
|
+
return;
|
|
2330
|
+
}
|
|
2331
|
+
const keyVar = PROVIDER_KEY_VARS[provider];
|
|
2332
|
+
if (keyVar === void 0) {
|
|
2333
|
+
throw new Error(
|
|
2334
|
+
`Unknown provider "${provider}". Supported: ${Object.keys(PROVIDER_KEY_VARS).join(", ")}`
|
|
2335
|
+
);
|
|
2336
|
+
}
|
|
2337
|
+
if (keyVar && !process.env[keyVar]) {
|
|
2338
|
+
throw new Error(
|
|
2339
|
+
`${keyVar} environment variable is required for the "${provider}" provider.`
|
|
2340
|
+
);
|
|
2341
|
+
}
|
|
2342
|
+
}
|
|
2343
|
+
|
|
2344
|
+
// src/mcp/tools.ts
|
|
2345
|
+
var PAGE_DIRS2 = [CONCEPTS_DIR, QUERIES_DIR];
|
|
2346
|
+
function jsonResult(payload) {
|
|
2347
|
+
return {
|
|
2348
|
+
content: [{ type: "text", text: JSON.stringify(payload, null, 2) }],
|
|
2349
|
+
structuredContent: { result: payload }
|
|
2350
|
+
};
|
|
2351
|
+
}
|
|
2352
|
+
function registerWikiTools(server, root) {
|
|
2353
|
+
registerIngestTool(server, root);
|
|
2354
|
+
registerCompileTool(server, root);
|
|
2355
|
+
registerQueryTool(server, root);
|
|
2356
|
+
registerSearchTool(server, root);
|
|
2357
|
+
registerReadTool(server, root);
|
|
2358
|
+
registerLintTool(server, root);
|
|
2359
|
+
registerStatusTool(server, root);
|
|
2360
|
+
}
|
|
2361
|
+
function registerIngestTool(server, root) {
|
|
2362
|
+
server.registerTool(
|
|
2363
|
+
"ingest_source",
|
|
2364
|
+
{
|
|
2365
|
+
title: "Ingest Source",
|
|
2366
|
+
description: "Fetch a URL or copy a local file into sources/. Returns the saved filename, character count, and whether content was truncated to fit the size limit.",
|
|
2367
|
+
inputSchema: {
|
|
2368
|
+
source: z.string().describe("URL (http/https) or absolute path to a .md/.txt file")
|
|
2369
|
+
}
|
|
2370
|
+
},
|
|
2371
|
+
async ({ source: source2 }) => {
|
|
2372
|
+
const previousCwd = process.cwd();
|
|
2373
|
+
try {
|
|
2374
|
+
process.chdir(root);
|
|
2375
|
+
const result = await ingestSource(source2);
|
|
2376
|
+
return jsonResult(result);
|
|
2377
|
+
} finally {
|
|
2378
|
+
process.chdir(previousCwd);
|
|
2379
|
+
}
|
|
2380
|
+
}
|
|
2381
|
+
);
|
|
2382
|
+
}
|
|
2383
|
+
function registerCompileTool(server, root) {
|
|
2384
|
+
server.registerTool(
|
|
2385
|
+
"compile_wiki",
|
|
2386
|
+
{
|
|
2387
|
+
title: "Compile Wiki",
|
|
2388
|
+
description: "Run the incremental compile pipeline: extract concepts from new/changed sources, generate wiki pages, resolve interlinks, and rebuild the index. Requires an LLM provider with credentials.",
|
|
2389
|
+
inputSchema: {}
|
|
2390
|
+
},
|
|
2391
|
+
async () => {
|
|
2392
|
+
ensureProviderAvailable();
|
|
2393
|
+
const result = await compileAndReport(root);
|
|
2394
|
+
return jsonResult(result);
|
|
2395
|
+
}
|
|
2396
|
+
);
|
|
2397
|
+
}
|
|
2398
|
+
function registerQueryTool(server, root) {
|
|
2399
|
+
server.registerTool(
|
|
2400
|
+
"query_wiki",
|
|
2401
|
+
{
|
|
2402
|
+
title: "Query Wiki",
|
|
2403
|
+
description: "Ask a natural-language question. Selects relevant pages with the LLM, loads them, and returns a grounded answer with citations. Set save=true to persist the answer as a wiki page. Requires an LLM provider.",
|
|
2404
|
+
inputSchema: {
|
|
2405
|
+
question: z.string().describe("The natural-language question to answer."),
|
|
2406
|
+
save: z.boolean().optional().describe("Persist the answer as a wiki/queries/ page when true.")
|
|
2407
|
+
}
|
|
2408
|
+
},
|
|
2409
|
+
async ({ question, save }) => {
|
|
2410
|
+
ensureProviderAvailable();
|
|
2411
|
+
const result = await generateAnswer(root, question, { save });
|
|
2412
|
+
return jsonResult(result);
|
|
2413
|
+
}
|
|
2414
|
+
);
|
|
2415
|
+
}
|
|
2416
|
+
function registerSearchTool(server, root) {
|
|
2417
|
+
server.registerTool(
|
|
2418
|
+
"search_pages",
|
|
2419
|
+
{
|
|
2420
|
+
title: "Search Pages",
|
|
2421
|
+
description: "Select pages relevant to a question and return their full content. Uses semantic embeddings when available, falling back to LLM-based selection over the wiki index. Requires an LLM provider.",
|
|
2422
|
+
inputSchema: {
|
|
2423
|
+
question: z.string().describe("The query used to rank pages.")
|
|
2424
|
+
}
|
|
2425
|
+
},
|
|
2426
|
+
async ({ question }) => {
|
|
2427
|
+
ensureProviderAvailable();
|
|
2428
|
+
const slugs = await pickSearchSlugs(root, question);
|
|
2429
|
+
const records = await loadPageRecords(root, slugs);
|
|
2430
|
+
return jsonResult({ pages: records });
|
|
2431
|
+
}
|
|
2432
|
+
);
|
|
2433
|
+
}
|
|
2434
|
+
async function pickSearchSlugs(root, question) {
|
|
2435
|
+
try {
|
|
2436
|
+
const candidates = await findRelevantPages(root, question);
|
|
2437
|
+
if (candidates.length > 0) return candidates.map((c) => c.slug);
|
|
2438
|
+
} catch {
|
|
2439
|
+
}
|
|
2440
|
+
const indexContent = await safeReadFile(path17.join(root, INDEX_FILE));
|
|
2441
|
+
const { pages } = await selectPages(question, indexContent);
|
|
2442
|
+
return pages;
|
|
2443
|
+
}
|
|
2444
|
+
function registerReadTool(server, root) {
|
|
2445
|
+
server.registerTool(
|
|
2446
|
+
"read_page",
|
|
2447
|
+
{
|
|
2448
|
+
title: "Read Page",
|
|
2449
|
+
description: "Read a single wiki page by slug. Searches concepts/ first, then queries/. Returns the parsed frontmatter and body. No LLM call required.",
|
|
2450
|
+
inputSchema: {
|
|
2451
|
+
slug: z.string().describe("Page slug, without .md extension.")
|
|
2452
|
+
}
|
|
2453
|
+
},
|
|
2454
|
+
async ({ slug }) => {
|
|
2455
|
+
const page = await readPage(root, slug);
|
|
2456
|
+
if (!page) {
|
|
2457
|
+
throw new Error(`Page not found: ${slug}`);
|
|
2458
|
+
}
|
|
2459
|
+
return jsonResult(page);
|
|
2460
|
+
}
|
|
2461
|
+
);
|
|
2462
|
+
}
|
|
2463
|
+
function registerLintTool(server, root) {
|
|
2464
|
+
server.registerTool(
|
|
2465
|
+
"lint_wiki",
|
|
2466
|
+
{
|
|
2467
|
+
title: "Lint Wiki",
|
|
2468
|
+
description: "Run rule-based quality checks (broken wikilinks, orphans, duplicates, empty pages, broken citations). Returns structured diagnostics. No LLM call.",
|
|
2469
|
+
inputSchema: {}
|
|
2470
|
+
},
|
|
2471
|
+
async () => {
|
|
2472
|
+
const summary = await lint(root);
|
|
2473
|
+
return jsonResult(summary);
|
|
2474
|
+
}
|
|
2475
|
+
);
|
|
2476
|
+
}
|
|
2477
|
+
function registerStatusTool(server, root) {
|
|
2478
|
+
server.registerTool(
|
|
2479
|
+
"wiki_status",
|
|
2480
|
+
{
|
|
2481
|
+
title: "Wiki Status",
|
|
2482
|
+
description: "Summarize the wiki: page count, source count, last compile time, orphaned pages, and pending source changes. Read-only \u2014 never modifies the workspace.",
|
|
2483
|
+
inputSchema: {}
|
|
2484
|
+
},
|
|
2485
|
+
async () => jsonResult(await collectStatus(root))
|
|
2486
|
+
);
|
|
2487
|
+
}
|
|
2488
|
+
async function collectStatus(root) {
|
|
2489
|
+
const concepts = await collectPageSummaries(path17.join(root, CONCEPTS_DIR));
|
|
2490
|
+
const queries = await collectPageSummaries(path17.join(root, QUERIES_DIR));
|
|
2491
|
+
const state = await readState(root);
|
|
2492
|
+
const changes = await detectChanges(root, state);
|
|
2493
|
+
const orphans = await findOrphanedSlugs(root);
|
|
2494
|
+
const compileTimes = Object.values(state.sources).map((s) => s.compiledAt);
|
|
2495
|
+
const lastCompile = compileTimes.length > 0 ? compileTimes.sort().slice(-1)[0] : null;
|
|
2496
|
+
return {
|
|
2497
|
+
pages: { concepts: concepts.length, queries: queries.length, total: concepts.length + queries.length },
|
|
2498
|
+
sources: Object.keys(state.sources).length,
|
|
2499
|
+
lastCompiledAt: lastCompile,
|
|
2500
|
+
orphanedPages: orphans,
|
|
2501
|
+
pendingChanges: changes.filter((c) => c.status !== "unchanged").map((c) => ({ file: c.file, status: c.status }))
|
|
2502
|
+
};
|
|
2503
|
+
}
|
|
2504
|
+
async function findOrphanedSlugs(root) {
|
|
2505
|
+
const scanned = await scanWikiPages(path17.join(root, CONCEPTS_DIR));
|
|
2506
|
+
return scanned.filter(({ meta }) => meta.orphaned).map(({ slug }) => slug);
|
|
2507
|
+
}
|
|
2508
|
+
async function loadPageRecords(root, slugs) {
|
|
2509
|
+
const records = [];
|
|
2510
|
+
for (const slug of slugs) {
|
|
2511
|
+
const page = await readPage(root, slug);
|
|
2512
|
+
if (page) records.push(page);
|
|
2513
|
+
}
|
|
2514
|
+
return records;
|
|
2515
|
+
}
|
|
2516
|
+
async function readPage(root, slug) {
|
|
2517
|
+
for (const dir of PAGE_DIRS2) {
|
|
2518
|
+
const content = await safeReadFile(path17.join(root, dir, `${slug}.md`));
|
|
2519
|
+
if (!content) continue;
|
|
2520
|
+
const { meta, body } = parseFrontmatter(content);
|
|
2521
|
+
if (meta.orphaned) continue;
|
|
2522
|
+
return {
|
|
2523
|
+
slug,
|
|
2524
|
+
title: typeof meta.title === "string" ? meta.title : slug,
|
|
2525
|
+
summary: typeof meta.summary === "string" ? meta.summary : "",
|
|
2526
|
+
body: body.trim()
|
|
2527
|
+
};
|
|
2528
|
+
}
|
|
2529
|
+
return null;
|
|
2530
|
+
}
|
|
2531
|
+
|
|
2532
|
+
// src/mcp/resources.ts
|
|
2533
|
+
import path18 from "path";
|
|
2534
|
+
import { readdir as readdir8 } from "fs/promises";
|
|
2535
|
+
import { ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2536
|
+
function jsonContent(uri, payload) {
|
|
2537
|
+
return {
|
|
2538
|
+
uri: uri.href,
|
|
2539
|
+
mimeType: "application/json",
|
|
2540
|
+
text: JSON.stringify(payload, null, 2)
|
|
2541
|
+
};
|
|
2542
|
+
}
|
|
2543
|
+
function markdownContent(uri, text) {
|
|
2544
|
+
return {
|
|
2545
|
+
uri: uri.href,
|
|
2546
|
+
mimeType: "text/markdown",
|
|
2547
|
+
text
|
|
2548
|
+
};
|
|
2549
|
+
}
|
|
2550
|
+
function registerWikiResources(server, root) {
|
|
2551
|
+
registerIndexResource(server, root);
|
|
2552
|
+
registerSourcesResource(server, root);
|
|
2553
|
+
registerStateResource(server, root);
|
|
2554
|
+
registerConceptResource(server, root);
|
|
2555
|
+
registerQueryResource(server, root);
|
|
2556
|
+
}
|
|
2557
|
+
function registerIndexResource(server, root) {
|
|
2558
|
+
server.registerResource(
|
|
2559
|
+
"wiki-index",
|
|
2560
|
+
"llmwiki://index",
|
|
2561
|
+
{
|
|
2562
|
+
title: "Wiki Index",
|
|
2563
|
+
description: "Full content of wiki/index.md (auto-generated table of contents).",
|
|
2564
|
+
mimeType: "text/markdown"
|
|
2565
|
+
},
|
|
2566
|
+
async (uri) => {
|
|
2567
|
+
const content = await safeReadFile(path18.join(root, INDEX_FILE));
|
|
2568
|
+
return { contents: [markdownContent(uri, content)] };
|
|
2569
|
+
}
|
|
2570
|
+
);
|
|
2571
|
+
}
|
|
2572
|
+
function registerSourcesResource(server, root) {
|
|
2573
|
+
server.registerResource(
|
|
2574
|
+
"wiki-sources",
|
|
2575
|
+
"llmwiki://sources",
|
|
2576
|
+
{
|
|
2577
|
+
title: "Wiki Sources",
|
|
2578
|
+
description: "List of ingested source files with frontmatter metadata.",
|
|
2579
|
+
mimeType: "application/json"
|
|
2580
|
+
},
|
|
2581
|
+
async (uri) => ({
|
|
2582
|
+
contents: [jsonContent(uri, await listSources(root))]
|
|
2583
|
+
})
|
|
2584
|
+
);
|
|
2585
|
+
}
|
|
2586
|
+
function registerStateResource(server, root) {
|
|
2587
|
+
server.registerResource(
|
|
2588
|
+
"wiki-state",
|
|
2589
|
+
"llmwiki://state",
|
|
2590
|
+
{
|
|
2591
|
+
title: "Compilation State",
|
|
2592
|
+
description: "Per-source hashes, concepts, and last compile times from .llmwiki/state.json.",
|
|
2593
|
+
mimeType: "application/json"
|
|
2594
|
+
},
|
|
2595
|
+
async (uri) => {
|
|
2596
|
+
const state = await readState(root);
|
|
2597
|
+
return { contents: [jsonContent(uri, state)] };
|
|
2598
|
+
}
|
|
2599
|
+
);
|
|
2600
|
+
}
|
|
2601
|
+
function registerConceptResource(server, root) {
|
|
2602
|
+
server.registerResource(
|
|
2603
|
+
"wiki-concept",
|
|
2604
|
+
new ResourceTemplate("llmwiki://concept/{slug}", {
|
|
2605
|
+
list: async () => listPagesUnder(root, CONCEPTS_DIR, "concept")
|
|
2606
|
+
}),
|
|
2607
|
+
{
|
|
2608
|
+
title: "Wiki Concept",
|
|
2609
|
+
description: "A single concept page from wiki/concepts/ \u2014 frontmatter plus body.",
|
|
2610
|
+
mimeType: "application/json"
|
|
2611
|
+
},
|
|
2612
|
+
async (uri, { slug }) => ({
|
|
2613
|
+
contents: [jsonContent(uri, await loadPageWithMeta(root, CONCEPTS_DIR, String(slug)))]
|
|
2614
|
+
})
|
|
2615
|
+
);
|
|
2616
|
+
}
|
|
2617
|
+
function registerQueryResource(server, root) {
|
|
2618
|
+
server.registerResource(
|
|
2619
|
+
"wiki-query",
|
|
2620
|
+
new ResourceTemplate("llmwiki://query/{slug}", {
|
|
2621
|
+
list: async () => listPagesUnder(root, QUERIES_DIR, "query")
|
|
2622
|
+
}),
|
|
2623
|
+
{
|
|
2624
|
+
title: "Wiki Query",
|
|
2625
|
+
description: "A single saved query page from wiki/queries/ \u2014 frontmatter plus body.",
|
|
2626
|
+
mimeType: "application/json"
|
|
2627
|
+
},
|
|
2628
|
+
async (uri, { slug }) => ({
|
|
2629
|
+
contents: [jsonContent(uri, await loadPageWithMeta(root, QUERIES_DIR, String(slug)))]
|
|
2630
|
+
})
|
|
2631
|
+
);
|
|
2632
|
+
}
|
|
2633
|
+
async function listSources(root) {
|
|
2634
|
+
const sourcesPath = path18.join(root, SOURCES_DIR);
|
|
2635
|
+
let files;
|
|
2636
|
+
try {
|
|
2637
|
+
files = await readdir8(sourcesPath);
|
|
2638
|
+
} catch {
|
|
2639
|
+
return [];
|
|
2640
|
+
}
|
|
2641
|
+
const records = [];
|
|
2642
|
+
for (const file of files.filter((f) => f.endsWith(".md"))) {
|
|
2643
|
+
const content = await safeReadFile(path18.join(sourcesPath, file));
|
|
2644
|
+
const { meta } = parseFrontmatter(content);
|
|
2645
|
+
records.push({ filename: file, ...meta });
|
|
2646
|
+
}
|
|
2647
|
+
return records;
|
|
2648
|
+
}
|
|
2649
|
+
async function loadPageWithMeta(root, dir, slug) {
|
|
2650
|
+
const filePath = path18.join(root, dir, `${slug}.md`);
|
|
2651
|
+
const content = await safeReadFile(filePath);
|
|
2652
|
+
if (!content) {
|
|
2653
|
+
throw new Error(`Page not found: ${dir}/${slug}.md`);
|
|
2654
|
+
}
|
|
2655
|
+
const { meta, body } = parseFrontmatter(content);
|
|
2656
|
+
return { slug, meta, body: body.trim() };
|
|
2657
|
+
}
|
|
2658
|
+
async function listPagesUnder(root, dir, scheme) {
|
|
2659
|
+
const pagesPath = path18.join(root, dir);
|
|
2660
|
+
let files;
|
|
2661
|
+
try {
|
|
2662
|
+
files = await readdir8(pagesPath);
|
|
2663
|
+
} catch {
|
|
2664
|
+
return { resources: [] };
|
|
2665
|
+
}
|
|
2666
|
+
const resources = files.filter((f) => f.endsWith(".md")).map((f) => {
|
|
2667
|
+
const slug = f.replace(/\.md$/, "");
|
|
2668
|
+
return { uri: `llmwiki://${scheme}/${slug}`, name: slug };
|
|
2669
|
+
});
|
|
2670
|
+
return { resources };
|
|
2671
|
+
}
|
|
2672
|
+
|
|
2673
|
+
// src/mcp/server.ts
|
|
2674
|
+
async function startMCPServer(options) {
|
|
2675
|
+
const { root, version: version2 } = options;
|
|
2676
|
+
const server = new McpServer2({ name: "llmwiki", version: version2 }, {
|
|
2677
|
+
instructions: "llmwiki is a knowledge compiler. Use ingest_source to add raw sources, compile_wiki to run the LLM pipeline, query_wiki for grounded answers, and search_pages to retrieve relevant pages. read_page, lint_wiki, and wiki_status work without an API key."
|
|
2678
|
+
});
|
|
2679
|
+
registerWikiTools(server, root);
|
|
2680
|
+
registerWikiResources(server, root);
|
|
2681
|
+
const transport = new StdioServerTransport();
|
|
2682
|
+
await server.connect(transport);
|
|
2683
|
+
}
|
|
2684
|
+
|
|
1353
2685
|
// src/cli.ts
|
|
1354
2686
|
var require2 = createRequire(import.meta.url);
|
|
1355
2687
|
var { version } = require2("../package.json");
|
|
@@ -1364,8 +2696,8 @@ program.command("ingest <source>").description("Ingest a URL or local file into
|
|
|
1364
2696
|
}
|
|
1365
2697
|
});
|
|
1366
2698
|
program.command("compile").description("Compile sources/ into an interlinked wiki").action(async () => {
|
|
1367
|
-
requireApiKey();
|
|
1368
2699
|
try {
|
|
2700
|
+
requireProvider();
|
|
1369
2701
|
await compileCommand();
|
|
1370
2702
|
} catch (err) {
|
|
1371
2703
|
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
@@ -1373,8 +2705,8 @@ program.command("compile").description("Compile sources/ into an interlinked wik
|
|
|
1373
2705
|
}
|
|
1374
2706
|
});
|
|
1375
2707
|
program.command("query <question>").description("Ask a question against the wiki").option("--save", "Save the answer as a wiki page").action(async (question, options) => {
|
|
1376
|
-
requireApiKey();
|
|
1377
2708
|
try {
|
|
2709
|
+
requireProvider();
|
|
1378
2710
|
await queryCommand(process.cwd(), question, options);
|
|
1379
2711
|
} catch (err) {
|
|
1380
2712
|
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
@@ -1382,21 +2714,64 @@ program.command("query <question>").description("Ask a question against the wiki
|
|
|
1382
2714
|
}
|
|
1383
2715
|
});
|
|
1384
2716
|
program.command("watch").description("Watch sources/ and auto-recompile on changes").action(async () => {
|
|
1385
|
-
requireApiKey();
|
|
1386
2717
|
try {
|
|
2718
|
+
requireProvider();
|
|
1387
2719
|
await watchCommand();
|
|
1388
2720
|
} catch (err) {
|
|
1389
2721
|
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
1390
2722
|
process.exit(1);
|
|
1391
2723
|
}
|
|
1392
2724
|
});
|
|
1393
|
-
program.
|
|
1394
|
-
|
|
1395
|
-
|
|
2725
|
+
program.command("lint").description("Run rule-based quality checks against the wiki").action(async () => {
|
|
2726
|
+
try {
|
|
2727
|
+
await lintCommand();
|
|
2728
|
+
} catch (err) {
|
|
2729
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
2730
|
+
process.exit(1);
|
|
2731
|
+
}
|
|
2732
|
+
});
|
|
2733
|
+
program.command("serve").description("Start an MCP server exposing wiki tools and resources over stdio").option("--root <dir>", "Project root directory", process.cwd()).action(async (options) => {
|
|
2734
|
+
try {
|
|
2735
|
+
await startMCPServer({ root: options.root, version });
|
|
2736
|
+
} catch (err) {
|
|
2737
|
+
console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
|
|
2738
|
+
process.exit(1);
|
|
2739
|
+
}
|
|
2740
|
+
});
|
|
2741
|
+
var PROVIDER_KEY_VARS2 = {
|
|
2742
|
+
anthropic: "ANTHROPIC_API_KEY",
|
|
2743
|
+
openai: "OPENAI_API_KEY",
|
|
2744
|
+
ollama: null,
|
|
2745
|
+
minimax: "MINIMAX_API_KEY"
|
|
2746
|
+
};
|
|
2747
|
+
function requireProvider() {
|
|
2748
|
+
const provider = process.env.LLMWIKI_PROVIDER ?? DEFAULT_PROVIDER;
|
|
2749
|
+
if (provider === "anthropic") {
|
|
2750
|
+
const auth = resolveAnthropicAuthFromEnv();
|
|
2751
|
+
if (!auth.apiKey && !auth.authToken) {
|
|
2752
|
+
console.error(
|
|
2753
|
+
`\x1B[31mError:\x1B[0m Anthropic credentials are required for the "anthropic" provider.
|
|
2754
|
+
Set one of: export ANTHROPIC_API_KEY=<your-key> OR export ANTHROPIC_AUTH_TOKEN=<your-token>`
|
|
2755
|
+
);
|
|
2756
|
+
process.exit(1);
|
|
2757
|
+
}
|
|
2758
|
+
return;
|
|
2759
|
+
}
|
|
2760
|
+
const keyVar = PROVIDER_KEY_VARS2[provider];
|
|
2761
|
+
if (keyVar === void 0) {
|
|
1396
2762
|
console.error(
|
|
1397
|
-
|
|
2763
|
+
`\x1B[31mError:\x1B[0m Unknown provider "${provider}".
|
|
2764
|
+
Supported: ${Object.keys(PROVIDER_KEY_VARS2).join(", ")}`
|
|
2765
|
+
);
|
|
2766
|
+
process.exit(1);
|
|
2767
|
+
}
|
|
2768
|
+
if (keyVar && !process.env[keyVar]) {
|
|
2769
|
+
console.error(
|
|
2770
|
+
`\x1B[31mError:\x1B[0m ${keyVar} environment variable is required for the "${provider}" provider.
|
|
2771
|
+
Set it with: export ${keyVar}=<your-key>`
|
|
1398
2772
|
);
|
|
1399
2773
|
process.exit(1);
|
|
1400
2774
|
}
|
|
1401
2775
|
}
|
|
2776
|
+
program.parse();
|
|
1402
2777
|
//# sourceMappingURL=cli.js.map
|