llm-wiki-compiler 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -13,6 +13,12 @@ import { mkdir as mkdir2, writeFile as writeFile2 } from "fs/promises";
13
13
  import { writeFile, rename, readFile, mkdir } from "fs/promises";
14
14
  import path from "path";
15
15
  import yaml from "js-yaml";
16
+ var VALID_PROVENANCE_STATES = /* @__PURE__ */ new Set([
17
+ "extracted",
18
+ "merged",
19
+ "inferred",
20
+ "ambiguous"
21
+ ]);
16
22
  function slugify(title) {
17
23
  return title.toLowerCase().replace(/['']/g, "").replace(/[^\w\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
18
24
  }
@@ -50,6 +56,46 @@ async function safeReadFile(filePath) {
50
56
  return "";
51
57
  }
52
58
  }
59
+ function parseConfidence(raw) {
60
+ if (typeof raw !== "number" || !Number.isFinite(raw)) return void 0;
61
+ if (raw < 0) return 0;
62
+ if (raw > 1) return 1;
63
+ return raw;
64
+ }
65
+ function parseProvenanceState(raw) {
66
+ if (typeof raw !== "string") return void 0;
67
+ return VALID_PROVENANCE_STATES.has(raw) ? raw : void 0;
68
+ }
69
+ function coerceContradictionEntry(entry) {
70
+ if (typeof entry === "string" && entry.trim().length > 0) {
71
+ return { slug: entry.trim() };
72
+ }
73
+ if (entry && typeof entry === "object" && "slug" in entry) {
74
+ const obj = entry;
75
+ if (typeof obj.slug !== "string" || obj.slug.trim().length === 0) return null;
76
+ const ref = { slug: obj.slug.trim() };
77
+ if (typeof obj.reason === "string") ref.reason = obj.reason;
78
+ return ref;
79
+ }
80
+ return null;
81
+ }
82
+ function parseContradictedBy(raw) {
83
+ if (!Array.isArray(raw)) return void 0;
84
+ const refs = raw.map(coerceContradictionEntry).filter((ref) => ref !== null);
85
+ return refs.length > 0 ? refs : void 0;
86
+ }
87
+ function parseInferredParagraphs(raw) {
88
+ if (typeof raw !== "number" || !Number.isInteger(raw) || raw < 0) return void 0;
89
+ return raw;
90
+ }
91
+ function parseProvenanceMetadata(meta) {
92
+ return {
93
+ confidence: parseConfidence(meta.confidence),
94
+ provenanceState: parseProvenanceState(meta.provenanceState),
95
+ contradictedBy: parseContradictedBy(meta.contradictedBy),
96
+ inferredParagraphs: parseInferredParagraphs(meta.inferredParagraphs)
97
+ };
98
+ }
53
99
  function validateWikiPage(content) {
54
100
  if (!content || content.trim().length === 0) return false;
55
101
  const { meta, body } = parseFrontmatter(content);
@@ -66,7 +112,14 @@ var COMPILE_CONCURRENCY = 5;
66
112
  var RETRY_COUNT = 3;
67
113
  var RETRY_BASE_MS = 1e3;
68
114
  var RETRY_MULTIPLIER = 4;
69
- var MODEL = "claude-sonnet-4-20250514";
115
+ var DEFAULT_PROVIDER = "anthropic";
116
+ var PROVIDER_MODELS = {
117
+ anthropic: "claude-sonnet-4-20250514",
118
+ openai: "gpt-4o",
119
+ ollama: "llama3.1",
120
+ minimax: "MiniMax-M2.7"
121
+ };
122
+ var OLLAMA_DEFAULT_HOST = "http://localhost:11434/v1";
70
123
  var SOURCES_DIR = "sources";
71
124
  var CONCEPTS_DIR = "wiki/concepts";
72
125
  var QUERIES_DIR = "wiki/queries";
@@ -74,6 +127,18 @@ var LLMWIKI_DIR = ".llmwiki";
74
127
  var STATE_FILE = ".llmwiki/state.json";
75
128
  var LOCK_FILE = ".llmwiki/lock";
76
129
  var INDEX_FILE = "wiki/index.md";
130
+ var MOC_FILE = "wiki/MOC.md";
131
+ var EMBEDDINGS_FILE = ".llmwiki/embeddings.json";
132
+ var CANDIDATES_DIR = ".llmwiki/candidates";
133
+ var CANDIDATES_ARCHIVE_DIR = ".llmwiki/candidates/archive";
134
+ var EMBEDDING_TOP_K = 15;
135
+ var LOW_CONFIDENCE_THRESHOLD = 0.5;
136
+ var MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS = 2;
137
+ var EMBEDDING_MODELS = {
138
+ anthropic: "voyage-3-lite",
139
+ openai: "text-embedding-3-small",
140
+ ollama: "nomic-embed-text"
141
+ };
77
142
 
78
143
  // src/utils/output.ts
79
144
  var RESET = "\x1B[0m";
@@ -234,26 +299,36 @@ async function saveSource(title, document) {
234
299
  await writeFile2(destPath, document, "utf-8");
235
300
  return destPath;
236
301
  }
237
- async function ingest(source2) {
302
+ async function ingestSource(source2) {
238
303
  status("*", info(`Ingesting: ${source2}`));
239
304
  const { title, content } = isUrl(source2) ? await ingestWeb(source2) : await ingestFile(source2);
240
305
  const result = enforceCharLimit(content);
241
306
  enforceMinContent(result.content);
242
307
  const document = buildDocument(title, source2, result);
243
308
  const savedPath = await saveSource(title, document);
309
+ return {
310
+ filename: path3.basename(savedPath),
311
+ charCount: result.content.length,
312
+ truncated: result.truncated,
313
+ source: source2
314
+ };
315
+ }
316
+ async function ingest(source2) {
317
+ const result = await ingestSource(source2);
318
+ const savedPath = path3.join(SOURCES_DIR, result.filename);
244
319
  status(
245
320
  "+",
246
- success(`Saved ${bold(title)} \u2192 ${source(savedPath)}`)
321
+ success(`Saved ${bold(result.filename)} \u2192 ${source(savedPath)}`)
247
322
  );
248
323
  status("\u2192", dim("Next: llmwiki compile"));
249
324
  }
250
325
 
251
326
  // src/commands/compile.ts
252
- import { existsSync as existsSync3 } from "fs";
327
+ import { existsSync as existsSync5 } from "fs";
253
328
 
254
329
  // src/compiler/index.ts
255
- import { readFile as readFile7, readdir as readdir4 } from "fs/promises";
256
- import path10 from "path";
330
+ import { readFile as readFile8 } from "fs/promises";
331
+ import path16 from "path";
257
332
 
258
333
  // src/utils/state.ts
259
334
  import { readFile as readFile3, writeFile as writeFile3, rename as rename2, mkdir as mkdir3, copyFile } from "fs/promises";
@@ -296,30 +371,477 @@ async function removeSourceState(root, sourceFile) {
296
371
  await writeState(root, state);
297
372
  }
298
373
 
299
- // src/utils/llm.ts
374
+ // src/compiler/source-state.ts
375
+ import path6 from "path";
376
+
377
+ // src/compiler/hasher.ts
378
+ import { createHash } from "crypto";
379
+ import { readFile as readFile4, readdir } from "fs/promises";
380
+ import path5 from "path";
381
+ async function hashFile(filePath) {
382
+ const content = await readFile4(filePath, "utf-8");
383
+ return createHash("sha256").update(content).digest("hex");
384
+ }
385
+ async function detectChanges(root, prevState) {
386
+ const sourcesPath = path5.join(root, SOURCES_DIR);
387
+ const currentFiles = await listSourceFiles(sourcesPath);
388
+ const changes = [];
389
+ for (const file of currentFiles) {
390
+ const status2 = await classifyFile(root, file, prevState);
391
+ changes.push({ file, status: status2 });
392
+ }
393
+ const deletedChanges = findDeletedFiles(currentFiles, prevState);
394
+ changes.push(...deletedChanges);
395
+ return changes;
396
+ }
397
+ async function listSourceFiles(sourcesPath) {
398
+ try {
399
+ const entries = await readdir(sourcesPath);
400
+ return entries.filter((f) => f.endsWith(".md"));
401
+ } catch {
402
+ return [];
403
+ }
404
+ }
405
+ async function classifyFile(root, file, prevState) {
406
+ const filePath = path5.join(root, SOURCES_DIR, file);
407
+ const hash = await hashFile(filePath);
408
+ const prev = prevState.sources[file];
409
+ if (!prev) return "new";
410
+ if (prev.hash !== hash) return "changed";
411
+ return "unchanged";
412
+ }
413
+ function findDeletedFiles(currentFiles, prevState) {
414
+ const currentSet = new Set(currentFiles);
415
+ return Object.keys(prevState.sources).filter((file) => !currentSet.has(file)).map((file) => ({ file, status: "deleted" }));
416
+ }
417
+
418
+ // src/compiler/source-state.ts
419
+ async function buildExtractionSourceStates(root, extractions) {
420
+ const snapshot = {};
421
+ const compiledAt = (/* @__PURE__ */ new Date()).toISOString();
422
+ for (const result of extractions) {
423
+ if (result.concepts.length === 0) continue;
424
+ snapshot[result.sourceFile] = await buildEntry(root, result, compiledAt);
425
+ }
426
+ return snapshot;
427
+ }
428
+ async function buildEntry(root, result, compiledAt) {
429
+ const filePath = path6.join(root, SOURCES_DIR, result.sourceFile);
430
+ const hash = await hashFile(filePath);
431
+ return {
432
+ hash,
433
+ concepts: result.concepts.map((concept) => slugify(concept.concept)),
434
+ compiledAt
435
+ };
436
+ }
437
+ function pickStatesForSources(allStates, sourceFiles) {
438
+ const picked = {};
439
+ for (const file of sourceFiles) {
440
+ const entry = allStates[file];
441
+ if (entry) picked[file] = entry;
442
+ }
443
+ return picked;
444
+ }
445
+
446
+ // src/providers/anthropic.ts
300
447
  import Anthropic from "@anthropic-ai/sdk";
301
- var client = null;
302
- function getClient() {
303
- if (!client) {
304
- client = new Anthropic();
448
+ var VOYAGE_EMBEDDINGS_URL = "https://api.voyageai.com/v1/embeddings";
449
+ function buildAnthropicClientOptions(options = {}) {
450
+ const trimmedBaseURL = options.baseURL?.trim();
451
+ const trimmedApiKey = options.apiKey?.trim();
452
+ const trimmedAuthToken = options.authToken?.trim();
453
+ const result = {};
454
+ if (trimmedApiKey) {
455
+ result.apiKey = trimmedApiKey;
456
+ }
457
+ if (trimmedAuthToken) {
458
+ result.authToken = trimmedAuthToken;
459
+ }
460
+ if (!trimmedBaseURL) {
461
+ return result;
462
+ }
463
+ const normalizedBaseURL = trimmedBaseURL.endsWith("/") && trimmedBaseURL.length > 1 ? trimmedBaseURL.slice(0, -1) : trimmedBaseURL;
464
+ result.baseURL = normalizedBaseURL;
465
+ return result;
466
+ }
467
+ var AnthropicProvider = class {
468
+ client;
469
+ model;
470
+ constructor(model, options = {}) {
471
+ this.model = model;
472
+ this.client = new Anthropic(buildAnthropicClientOptions(options));
473
+ }
474
+ /** Send a single non-streaming completion request. */
475
+ async complete(system, messages, maxTokens) {
476
+ const response = await this.client.messages.create({
477
+ model: this.model,
478
+ max_tokens: maxTokens,
479
+ system,
480
+ messages
481
+ });
482
+ const textBlock = response.content.find((block) => block.type === "text");
483
+ return textBlock?.type === "text" ? textBlock.text : "";
484
+ }
485
+ /** Stream a completion, invoking onToken for each text chunk. */
486
+ async stream(system, messages, maxTokens, onToken) {
487
+ const stream = this.client.messages.stream({
488
+ model: this.model,
489
+ max_tokens: maxTokens,
490
+ system,
491
+ messages
492
+ });
493
+ let fullText = "";
494
+ for await (const event of stream) {
495
+ if (event.type === "content_block_delta" && event.delta.type === "text_delta") {
496
+ fullText += event.delta.text;
497
+ onToken?.(event.delta.text);
498
+ }
499
+ }
500
+ return fullText;
501
+ }
502
+ /** Call Claude with tool definitions and return the parsed tool input as JSON. */
503
+ async toolCall(system, messages, tools, maxTokens) {
504
+ const anthropicTools = tools.map((t) => ({
505
+ name: t.name,
506
+ description: t.description,
507
+ input_schema: t.input_schema
508
+ }));
509
+ const response = await this.client.messages.create({
510
+ model: this.model,
511
+ max_tokens: maxTokens,
512
+ system,
513
+ messages,
514
+ tools: anthropicTools
515
+ });
516
+ const toolBlock = response.content.find((block) => block.type === "tool_use");
517
+ if (toolBlock?.type === "tool_use") {
518
+ return JSON.stringify(toolBlock.input);
519
+ }
520
+ const textBlock = response.content.find((block) => block.type === "text");
521
+ return textBlock?.type === "text" ? textBlock.text : "";
522
+ }
523
+ /**
524
+ * Produce a single embedding vector via the Voyage API.
525
+ *
526
+ * Anthropic does not ship a first-party embeddings endpoint, so we delegate
527
+ * to Voyage (their recommended partner). Requires VOYAGE_API_KEY.
528
+ */
529
+ async embed(text) {
530
+ const apiKey = process.env.VOYAGE_API_KEY?.trim();
531
+ if (!apiKey) {
532
+ throw new Error(
533
+ "VOYAGE_API_KEY is not set. Anthropic embeddings use Voyage \u2014 set VOYAGE_API_KEY to enable semantic search."
534
+ );
535
+ }
536
+ const response = await fetch(VOYAGE_EMBEDDINGS_URL, {
537
+ method: "POST",
538
+ headers: {
539
+ "Content-Type": "application/json",
540
+ Authorization: `Bearer ${apiKey}`
541
+ },
542
+ body: JSON.stringify({ input: text, model: EMBEDDING_MODELS.anthropic })
543
+ });
544
+ if (!response.ok) {
545
+ const detail = await response.text();
546
+ throw new Error(`Voyage embeddings request failed (${response.status}): ${detail}`);
547
+ }
548
+ const json = await response.json();
549
+ const vector = json.data?.[0]?.embedding;
550
+ if (!Array.isArray(vector)) {
551
+ throw new Error("Voyage embeddings response did not include a vector.");
552
+ }
553
+ return vector;
554
+ }
555
+ };
556
+
557
+ // src/providers/openai.ts
558
+ import OpenAI from "openai";
559
+ function translateToolToOpenAI(tool) {
560
+ return {
561
+ type: "function",
562
+ function: {
563
+ name: tool.name,
564
+ description: tool.description,
565
+ parameters: tool.input_schema
566
+ }
567
+ };
568
+ }
569
+ var OpenAIProvider = class {
570
+ client;
571
+ embeddingsClient;
572
+ model;
573
+ configuredEmbeddingModel;
574
+ constructor(model, options = {}) {
575
+ this.model = model;
576
+ this.configuredEmbeddingModel = options.embeddingModel;
577
+ const resolvedKey = options.apiKey ?? process.env.OPENAI_API_KEY ?? "";
578
+ this.client = new OpenAI({
579
+ apiKey: resolvedKey,
580
+ baseURL: options.baseURL ?? null
581
+ });
582
+ this.embeddingsClient = options.embeddingsBaseURL ? new OpenAI({ apiKey: resolvedKey, baseURL: options.embeddingsBaseURL }) : this.client;
583
+ }
584
+ /** Send a single non-streaming completion request. */
585
+ async complete(system, messages, maxTokens) {
586
+ const response = await this.client.chat.completions.create({
587
+ model: this.model,
588
+ max_tokens: maxTokens,
589
+ messages: [{ role: "system", content: system }, ...messages]
590
+ });
591
+ return response.choices[0]?.message?.content ?? "";
592
+ }
593
+ /** Stream a completion, invoking onToken for each text chunk. */
594
+ async stream(system, messages, maxTokens, onToken) {
595
+ const stream = await this.client.chat.completions.create({
596
+ model: this.model,
597
+ max_tokens: maxTokens,
598
+ messages: [{ role: "system", content: system }, ...messages],
599
+ stream: true
600
+ });
601
+ let fullText = "";
602
+ for await (const chunk of stream) {
603
+ const delta = chunk.choices[0]?.delta?.content;
604
+ if (delta) {
605
+ fullText += delta;
606
+ onToken?.(delta);
607
+ }
608
+ }
609
+ return fullText;
610
+ }
611
+ /** Call the model with tool definitions and return the parsed tool input as JSON. */
612
+ async toolCall(system, messages, tools, maxTokens) {
613
+ const openaiTools = tools.map(translateToolToOpenAI);
614
+ const response = await this.client.chat.completions.create({
615
+ model: this.model,
616
+ max_tokens: maxTokens,
617
+ messages: [{ role: "system", content: system }, ...messages],
618
+ tools: openaiTools
619
+ });
620
+ const toolCalls = response.choices[0]?.message?.tool_calls;
621
+ if (toolCalls && toolCalls.length > 0) {
622
+ return toolCalls[0].function.arguments;
623
+ }
624
+ return response.choices[0]?.message?.content ?? "";
625
+ }
626
+ /**
627
+ * Produce a single embedding vector via the OpenAI embeddings API.
628
+ * Subclasses (e.g. Ollama) override embeddingModel() to pick a different model.
629
+ */
630
+ async embed(text) {
631
+ const response = await this.embeddingsClient.embeddings.create({
632
+ model: this.embeddingModel(),
633
+ input: text
634
+ });
635
+ const vector = response.data[0]?.embedding;
636
+ if (!Array.isArray(vector)) {
637
+ throw new Error("OpenAI embeddings response did not include a vector.");
638
+ }
639
+ return vector;
640
+ }
641
+ /** Default embedding model for this provider. Subclasses may override. */
642
+ embeddingModel() {
643
+ return this.configuredEmbeddingModel ?? EMBEDDING_MODELS.openai;
644
+ }
645
+ };
646
+
647
+ // src/providers/ollama.ts
648
+ var OllamaProvider = class extends OpenAIProvider {
649
+ constructor(model, options) {
650
+ super(model, {
651
+ baseURL: options.baseURL,
652
+ apiKey: "ollama",
653
+ embeddingsBaseURL: options.embeddingsBaseURL,
654
+ embeddingModel: options.embeddingModel
655
+ });
656
+ }
657
+ /** Ollama ships a dedicated embedding model (nomic-embed-text). */
658
+ embeddingModel() {
659
+ return this.configuredEmbeddingModel ?? EMBEDDING_MODELS.ollama;
660
+ }
661
+ };
662
+
663
+ // src/providers/minimax.ts
664
+ var MINIMAX_BASE_URL = "https://api.minimax.io/v1";
665
+ var MiniMaxProvider = class extends OpenAIProvider {
666
+ constructor(model, apiKey) {
667
+ super(model, { baseURL: MINIMAX_BASE_URL, apiKey });
668
+ }
669
+ };
670
+
671
+ // src/utils/claude-settings.ts
672
+ import { readFileSync } from "fs";
673
+ import { homedir } from "os";
674
+ import path7 from "path";
675
+ var CLAUDE_SETTINGS_PATH_ENV = "LLMWIKI_CLAUDE_SETTINGS_PATH";
676
+ function isRecord(value) {
677
+ return typeof value === "object" && value !== null;
678
+ }
679
+ function normalize(value) {
680
+ if (typeof value !== "string") return void 0;
681
+ const trimmed = value.trim();
682
+ return trimmed.length > 0 ? trimmed : void 0;
683
+ }
684
+ function resolveClaudeSettingsPath(env) {
685
+ return env[CLAUDE_SETTINGS_PATH_ENV] ?? path7.join(homedir(), ".claude", "settings.json");
686
+ }
687
+ function readClaudeSettingsFile(settingsPath) {
688
+ try {
689
+ return readFileSync(settingsPath, "utf8");
690
+ } catch (err) {
691
+ if (isRecord(err) && err.code === "ENOENT") {
692
+ return void 0;
693
+ }
694
+ const message = err instanceof Error ? err.message : String(err);
695
+ throw new Error(`Failed to read Claude settings at "${settingsPath}": ${message}`);
696
+ }
697
+ }
698
+ function readClaudeSettingsEnv(env = process.env) {
699
+ const settingsPath = resolveClaudeSettingsPath(env);
700
+ const raw = readClaudeSettingsFile(settingsPath);
701
+ if (!raw) return void 0;
702
+ let parsed;
703
+ try {
704
+ parsed = JSON.parse(raw);
705
+ } catch (err) {
706
+ const message = err instanceof Error ? err.message : String(err);
707
+ throw new Error(`Failed to parse Claude settings at "${settingsPath}": ${message}`);
708
+ }
709
+ if (!isRecord(parsed) || !isRecord(parsed.env)) {
710
+ return void 0;
711
+ }
712
+ const values = {
713
+ ANTHROPIC_API_KEY: normalize(parsed.env.ANTHROPIC_API_KEY),
714
+ ANTHROPIC_AUTH_TOKEN: normalize(parsed.env.ANTHROPIC_AUTH_TOKEN),
715
+ ANTHROPIC_BASE_URL: normalize(parsed.env.ANTHROPIC_BASE_URL),
716
+ ANTHROPIC_MODEL: normalize(parsed.env.ANTHROPIC_MODEL)
717
+ };
718
+ if (!values.ANTHROPIC_API_KEY && !values.ANTHROPIC_AUTH_TOKEN && !values.ANTHROPIC_BASE_URL && !values.ANTHROPIC_MODEL) {
719
+ return void 0;
720
+ }
721
+ return values;
722
+ }
723
+ function tryReadClaudeSettingsEnv(env) {
724
+ try {
725
+ return readClaudeSettingsEnv(env);
726
+ } catch {
727
+ return void 0;
728
+ }
729
+ }
730
+ function validateAnthropicBaseURL(value) {
731
+ const normalized = value.trim();
732
+ try {
733
+ const parsed = new URL(normalized);
734
+ if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
735
+ throw new Error("Must use http:// or https:// protocol.");
736
+ }
737
+ } catch (err) {
738
+ const message = err instanceof Error ? err.message : "Must be a valid http(s) URL.";
739
+ throw new Error(`Invalid ANTHROPIC_BASE_URL: "${normalized}". ${message}`);
740
+ }
741
+ return normalized;
742
+ }
743
+ function resolveAnthropicAuthFromEnv(env = process.env) {
744
+ const explicitApiKey = normalize(env.ANTHROPIC_API_KEY);
745
+ if (explicitApiKey) return { apiKey: explicitApiKey };
746
+ const explicitAuthToken = normalize(env.ANTHROPIC_AUTH_TOKEN);
747
+ if (explicitAuthToken) return { authToken: explicitAuthToken };
748
+ const fallback = readClaudeSettingsEnv(env);
749
+ if (fallback?.ANTHROPIC_API_KEY) return { apiKey: fallback.ANTHROPIC_API_KEY };
750
+ if (fallback?.ANTHROPIC_AUTH_TOKEN) return { authToken: fallback.ANTHROPIC_AUTH_TOKEN };
751
+ return {};
752
+ }
753
+ function resolveAnthropicModelFromEnv(env = process.env) {
754
+ const explicitModel = env.LLMWIKI_MODEL;
755
+ if (explicitModel !== void 0) return explicitModel;
756
+ return tryReadClaudeSettingsEnv(env)?.ANTHROPIC_MODEL;
757
+ }
758
+ function resolveAnthropicBaseURLFromEnv(env = process.env) {
759
+ const explicitBaseURL = normalize(env.ANTHROPIC_BASE_URL);
760
+ if (explicitBaseURL) return validateAnthropicBaseURL(explicitBaseURL);
761
+ const fallbackBaseURL = tryReadClaudeSettingsEnv(env)?.ANTHROPIC_BASE_URL;
762
+ if (!fallbackBaseURL) return void 0;
763
+ return validateAnthropicBaseURL(fallbackBaseURL);
764
+ }
765
+
766
+ // src/utils/provider.ts
767
+ var SUPPORTED_PROVIDERS = /* @__PURE__ */ new Set(["anthropic", "openai", "ollama", "minimax"]);
768
+ function getProvider() {
769
+ const providerName = getProviderName();
770
+ switch (providerName) {
771
+ case "anthropic":
772
+ return getAnthropicProvider();
773
+ case "openai":
774
+ return new OpenAIProvider(getModelForProvider("openai"), {
775
+ baseURL: readOptionalEnv("OPENAI_BASE_URL"),
776
+ embeddingsBaseURL: readOptionalEnv("OPENAI_EMBEDDINGS_BASE_URL"),
777
+ embeddingModel: readOptionalEnv("LLMWIKI_EMBEDDING_MODEL")
778
+ });
779
+ case "ollama":
780
+ return new OllamaProvider(getModelForProvider("ollama"), {
781
+ baseURL: readOptionalEnv("OLLAMA_HOST") ?? OLLAMA_DEFAULT_HOST,
782
+ embeddingsBaseURL: readOptionalEnv("OLLAMA_EMBEDDINGS_HOST"),
783
+ embeddingModel: readOptionalEnv("LLMWIKI_EMBEDDING_MODEL")
784
+ });
785
+ case "minimax":
786
+ return getMiniMaxProvider();
787
+ default:
788
+ throw new Error(`Unhandled provider: ${providerName}`);
789
+ }
790
+ }
791
+ function readOptionalEnv(name) {
792
+ const value = process.env[name]?.trim();
793
+ return value ? value : void 0;
794
+ }
795
+ function getModelForProvider(providerName) {
796
+ return process.env.LLMWIKI_MODEL ?? PROVIDER_MODELS[providerName];
797
+ }
798
+ function getMiniMaxProvider() {
799
+ const apiKey = process.env.MINIMAX_API_KEY;
800
+ if (!apiKey) {
801
+ throw new Error(
802
+ "MiniMax provider requires MINIMAX_API_KEY environment variable.\n Set it with: export MINIMAX_API_KEY=your_key"
803
+ );
804
+ }
805
+ return new MiniMaxProvider(getModelForProvider("minimax"), apiKey);
806
+ }
807
+ function getAnthropicProvider() {
808
+ const model = resolveAnthropicModelFromEnv() ?? PROVIDER_MODELS.anthropic;
809
+ const baseURL = resolveAnthropicBaseURLFromEnv();
810
+ const auth = resolveAnthropicAuthFromEnv();
811
+ return new AnthropicProvider(model, {
812
+ baseURL,
813
+ ...auth
814
+ });
815
+ }
816
+ function getProviderName() {
817
+ const providerName = process.env.LLMWIKI_PROVIDER ?? DEFAULT_PROVIDER;
818
+ if (!SUPPORTED_PROVIDERS.has(providerName)) {
819
+ throw new Error(
820
+ `Unknown provider "${providerName}". Supported: ${[...SUPPORTED_PROVIDERS].join(", ")}`
821
+ );
305
822
  }
306
- return client;
823
+ return providerName;
307
824
  }
825
+ function getActiveProviderName() {
826
+ return getProviderName();
827
+ }
828
+
829
+ // src/utils/llm.ts
308
830
  function sleep(ms) {
309
831
  return new Promise((resolve) => setTimeout(resolve, ms));
310
832
  }
311
833
  async function callClaude(options) {
312
834
  const { system, messages, tools, maxTokens = 4096, stream = false, onToken } = options;
313
- const anthropic = getClient();
835
+ const provider = getProvider();
314
836
  for (let attempt = 0; attempt <= RETRY_COUNT; attempt++) {
315
837
  try {
316
838
  if (stream) {
317
- return await callClaudeStreaming(anthropic, system, messages, maxTokens, onToken);
839
+ return await provider.stream(system, messages, maxTokens, onToken);
318
840
  }
319
841
  if (tools && tools.length > 0) {
320
- return await callClaudeToolUse(anthropic, system, messages, tools, maxTokens);
842
+ return await provider.toolCall(system, messages, tools, maxTokens);
321
843
  }
322
- return await callClaudeBasic(anthropic, system, messages, maxTokens);
844
+ return await provider.complete(system, messages, maxTokens);
323
845
  } catch (error2) {
324
846
  if (attempt === RETRY_COUNT) throw error2;
325
847
  const delayMs = RETRY_BASE_MS * Math.pow(RETRY_MULTIPLIER, attempt);
@@ -331,57 +853,10 @@ async function callClaude(options) {
331
853
  }
332
854
  throw new Error("Unreachable");
333
855
  }
334
- async function callClaudeStreaming(anthropic, system, messages, maxTokens, onToken) {
335
- const stream = anthropic.messages.stream({
336
- model: MODEL,
337
- max_tokens: maxTokens,
338
- system,
339
- messages
340
- });
341
- let fullText = "";
342
- for await (const event of stream) {
343
- if (event.type === "content_block_delta" && event.delta.type === "text_delta") {
344
- fullText += event.delta.text;
345
- onToken?.(event.delta.text);
346
- }
347
- }
348
- return fullText;
349
- }
350
- async function callClaudeToolUse(anthropic, system, messages, tools, maxTokens) {
351
- const response = await anthropic.messages.create({
352
- model: MODEL,
353
- max_tokens: maxTokens,
354
- system,
355
- messages,
356
- tools
357
- });
358
- const toolBlock = response.content.find((block) => block.type === "tool_use");
359
- if (toolBlock && toolBlock.type === "tool_use") {
360
- return JSON.stringify(toolBlock.input);
361
- }
362
- const textBlock = response.content.find((block) => block.type === "text");
363
- if (textBlock && textBlock.type === "text") {
364
- return textBlock.text;
365
- }
366
- return "";
367
- }
368
- async function callClaudeBasic(anthropic, system, messages, maxTokens) {
369
- const response = await anthropic.messages.create({
370
- model: MODEL,
371
- max_tokens: maxTokens,
372
- system,
373
- messages
374
- });
375
- const textBlock = response.content.find((block) => block.type === "text");
376
- if (textBlock && textBlock.type === "text") {
377
- return textBlock.text;
378
- }
379
- return "";
380
- }
381
856
 
382
857
  // src/utils/lock.ts
383
- import { open, readFile as readFile4, unlink, mkdir as mkdir4 } from "fs/promises";
384
- import path5 from "path";
858
+ import { open, readFile as readFile5, unlink, mkdir as mkdir4 } from "fs/promises";
859
+ import path8 from "path";
385
860
  var RECLAIM_SUFFIX = ".reclaim";
386
861
  var MAX_ACQUIRE_ATTEMPTS = 2;
387
862
  function isProcessAlive(pid) {
@@ -393,8 +868,8 @@ function isProcessAlive(pid) {
393
868
  }
394
869
  }
395
870
  async function acquireLock(root) {
396
- const lockPath = path5.join(root, LOCK_FILE);
397
- await mkdir4(path5.join(root, LLMWIKI_DIR), { recursive: true });
871
+ const lockPath = path8.join(root, LOCK_FILE);
872
+ await mkdir4(path8.join(root, LLMWIKI_DIR), { recursive: true });
398
873
  for (let attempt = 0; attempt < MAX_ACQUIRE_ATTEMPTS; attempt++) {
399
874
  const created = await tryCreateLock(lockPath);
400
875
  if (created) return true;
@@ -457,7 +932,7 @@ async function tryCreateLock(lockPath) {
457
932
  }
458
933
  async function isLockStale(lockPath) {
459
934
  try {
460
- const content = await readFile4(lockPath, "utf-8");
935
+ const content = await readFile5(lockPath, "utf-8");
461
936
  const pid = parseInt(content.trim(), 10);
462
937
  if (isNaN(pid)) return true;
463
938
  return !isProcessAlive(pid);
@@ -466,7 +941,7 @@ async function isLockStale(lockPath) {
466
941
  }
467
942
  }
468
943
  async function releaseLock(root) {
469
- const lockPath = path5.join(root, LOCK_FILE);
944
+ const lockPath = path8.join(root, LOCK_FILE);
470
945
  try {
471
946
  await unlink(lockPath);
472
947
  } catch {
@@ -474,6 +949,12 @@ async function releaseLock(root) {
474
949
  }
475
950
 
476
951
  // src/compiler/prompts.ts
952
+ var PROVENANCE_STATE_VALUES = [
953
+ "extracted",
954
+ "merged",
955
+ "inferred",
956
+ "ambiguous"
957
+ ];
477
958
  var CONCEPT_EXTRACTION_TOOL = {
478
959
  name: "extract_concepts",
479
960
  description: "Extract knowledge concepts from a source document",
@@ -496,6 +977,36 @@ var CONCEPT_EXTRACTION_TOOL = {
496
977
  is_new: {
497
978
  type: "boolean",
498
979
  description: "True if this is a new concept not in existing wiki"
980
+ },
981
+ tags: {
982
+ type: "array",
983
+ items: { type: "string" },
984
+ description: "2-4 categorical tags for organizing this concept (e.g., 'machine-learning', 'optimization')"
985
+ },
986
+ confidence: {
987
+ type: "number",
988
+ description: "Confidence in this concept on a 0..1 scale (1 = directly stated, 0 = highly speculative)."
989
+ },
990
+ provenance_state: {
991
+ type: "string",
992
+ enum: PROVENANCE_STATE_VALUES,
993
+ description: "How this concept was produced: 'extracted' (direct from source), 'merged' (synthesised across sources), 'inferred' (model deduction), or 'ambiguous' (sources disagree)."
994
+ },
995
+ contradicted_by: {
996
+ type: "array",
997
+ items: {
998
+ type: "object",
999
+ properties: {
1000
+ slug: { type: "string", description: "Slug of the contradicting concept." },
1001
+ reason: { type: "string", description: "Brief reason for the contradiction." }
1002
+ },
1003
+ required: ["slug"]
1004
+ },
1005
+ description: "Slugs of other concepts whose evidence contradicts this one."
1006
+ },
1007
+ inferred_paragraphs: {
1008
+ type: "integer",
1009
+ description: "Estimated number of paragraphs in the page that will be inferred rather than directly cited."
499
1010
  }
500
1011
  },
501
1012
  required: ["concept", "summary", "is_new"]
@@ -517,6 +1028,17 @@ ${existingIndex}` : "\n\nNo existing wiki pages yet.";
517
1028
  "Each concept should be a standalone topic that someone might look up.",
518
1029
  "Focus on key ideas, techniques, patterns, or entities \u2014 not trivial details.",
519
1030
  "Use the extract_concepts tool to return your findings.",
1031
+ "",
1032
+ "For every concept, emit provenance metadata so downstream tools can reason",
1033
+ "about reliability:",
1034
+ " - confidence: 0..1 \u2014 how certain you are the source supports this concept.",
1035
+ " - provenance_state: 'extracted' if directly stated, 'merged' if synthesised",
1036
+ " from multiple parts of the source, 'inferred' if reasoned from context,",
1037
+ " or 'ambiguous' if the source is contradictory or unclear.",
1038
+ " - contradicted_by: slugs of other concepts (in this batch or the index)",
1039
+ " whose evidence conflicts with this one.",
1040
+ " - inferred_paragraphs: estimated number of paragraphs in the resulting",
1041
+ " page that will be inferred rather than directly citable.",
520
1042
  indexSection,
521
1043
  "\n\n--- SOURCE DOCUMENT ---\n\n",
522
1044
  sourceContent
@@ -539,64 +1061,60 @@ ${relatedPages}` : "";
539
1061
  "Include a ## Sources section at the end listing the source document.",
540
1062
  "Suggest [[wikilinks]] to related concepts where appropriate.",
541
1063
  "Write in a neutral, informative tone. Be concise but thorough.",
1064
+ "",
1065
+ "Source attribution: at the end of each prose paragraph, append a citation",
1066
+ "marker showing which source file(s) the paragraph drew from.",
1067
+ "Format: ^[filename.md] for single-source, ^[source-a.md, source-b.md] for multi-source.",
1068
+ "Place citations only at the end of prose paragraphs \u2014 not on headings, list items, or code blocks.",
1069
+ "Source filenames are visible as `--- SOURCE: filename.md ---` headers in the content below.",
1070
+ "",
1071
+ "If a paragraph is your inference rather than a direct extraction, leave it",
1072
+ "uncited \u2014 downstream lint rules will count uncited paragraphs as 'inferred'",
1073
+ "to compute the page's provenance metadata.",
542
1074
  existingSection,
543
1075
  relatedSection,
544
1076
  "\n\n--- SOURCE MATERIAL ---\n\n",
545
1077
  sourceContent
546
1078
  ].join("\n");
547
1079
  }
548
- function parseConcepts(toolOutput) {
549
- try {
550
- const parsed = JSON.parse(toolOutput);
551
- const concepts = parsed.concepts ?? [];
552
- return concepts.filter(
553
- (c) => typeof c.concept === "string" && typeof c.summary === "string" && typeof c.is_new === "boolean"
554
- );
555
- } catch {
556
- return [];
557
- }
558
- }
559
-
560
- // src/compiler/hasher.ts
561
- import { createHash } from "crypto";
562
- import { readFile as readFile5, readdir } from "fs/promises";
563
- import path6 from "path";
564
- async function hashFile(filePath) {
565
- const content = await readFile5(filePath, "utf-8");
566
- return createHash("sha256").update(content).digest("hex");
1080
+ function isValidRawConcept(c) {
1081
+ return typeof c.concept === "string" && typeof c.summary === "string" && typeof c.is_new === "boolean" && (c.tags === void 0 || Array.isArray(c.tags));
567
1082
  }
568
- async function detectChanges(root, prevState) {
569
- const sourcesPath = path6.join(root, SOURCES_DIR);
570
- const currentFiles = await listSourceFiles(sourcesPath);
571
- const changes = [];
572
- for (const file of currentFiles) {
573
- const status2 = await classifyFile(root, file, prevState);
574
- changes.push({ file, status: status2 });
1083
+ function coerceContradictedBy(raw) {
1084
+ if (!Array.isArray(raw)) return void 0;
1085
+ const refs = [];
1086
+ for (const entry of raw) {
1087
+ if (!entry || typeof entry !== "object") continue;
1088
+ const obj = entry;
1089
+ if (typeof obj.slug !== "string" || obj.slug.trim().length === 0) continue;
1090
+ const ref = { slug: obj.slug.trim() };
1091
+ if (typeof obj.reason === "string") ref.reason = obj.reason;
1092
+ refs.push(ref);
575
1093
  }
576
- const deletedChanges = findDeletedFiles(currentFiles, prevState);
577
- changes.push(...deletedChanges);
578
- return changes;
1094
+ return refs.length > 0 ? refs : void 0;
579
1095
  }
580
- async function listSourceFiles(sourcesPath) {
1096
+ function mapRawConcept(c) {
1097
+ const provenance = typeof c.provenance_state === "string" && PROVENANCE_STATE_VALUES.includes(c.provenance_state) ? c.provenance_state : void 0;
1098
+ return {
1099
+ concept: c.concept,
1100
+ summary: c.summary,
1101
+ is_new: c.is_new,
1102
+ tags: Array.isArray(c.tags) ? c.tags : void 0,
1103
+ confidence: typeof c.confidence === "number" ? c.confidence : void 0,
1104
+ provenanceState: provenance,
1105
+ contradictedBy: coerceContradictedBy(c.contradicted_by),
1106
+ inferredParagraphs: typeof c.inferred_paragraphs === "number" && Number.isInteger(c.inferred_paragraphs) && c.inferred_paragraphs >= 0 ? c.inferred_paragraphs : void 0
1107
+ };
1108
+ }
1109
+ function parseConcepts(toolOutput) {
581
1110
  try {
582
- const entries = await readdir(sourcesPath);
583
- return entries.filter((f) => f.endsWith(".md"));
1111
+ const parsed = JSON.parse(toolOutput);
1112
+ const concepts = parsed.concepts ?? [];
1113
+ return concepts.filter(isValidRawConcept).map(mapRawConcept);
584
1114
  } catch {
585
1115
  return [];
586
1116
  }
587
1117
  }
588
- async function classifyFile(root, file, prevState) {
589
- const filePath = path6.join(root, SOURCES_DIR, file);
590
- const hash = await hashFile(filePath);
591
- const prev = prevState.sources[file];
592
- if (!prev) return "new";
593
- if (prev.hash !== hash) return "changed";
594
- return "unchanged";
595
- }
596
- function findDeletedFiles(currentFiles, prevState) {
597
- const currentSet = new Set(currentFiles);
598
- return Object.keys(prevState.sources).filter((file) => !currentSet.has(file)).map((file) => ({ file, status: "deleted" }));
599
- }
600
1118
 
601
1119
  // src/compiler/deps.ts
602
1120
  function buildConceptToSourcesMap(sources) {
@@ -613,28 +1131,37 @@ function buildConceptToSourcesMap(sources) {
613
1131
  }
614
1132
  return conceptMap;
615
1133
  }
616
- function findAffectedSources(state, directChanges) {
617
- const changedFiles = new Set(
618
- directChanges.filter((c) => c.status === "new" || c.status === "changed").map((c) => c.file)
619
- );
620
- const deletedFiles = new Set(
621
- directChanges.filter((c) => c.status === "deleted").map((c) => c.file)
1134
+ function filesByStatus(changes, ...statuses) {
1135
+ const statusSet = new Set(statuses);
1136
+ return new Set(
1137
+ changes.filter((c) => statusSet.has(c.status)).map((c) => c.file)
622
1138
  );
1139
+ }
1140
+ function collectSharedContributors(sourceFile, state, conceptMap, excludeSets, out) {
1141
+ const sourceEntry = state.sources[sourceFile];
1142
+ if (!sourceEntry) return;
1143
+ for (const slug of sourceEntry.concepts) {
1144
+ const contributors = conceptMap.get(slug);
1145
+ if (!contributors || contributors.length < 2) continue;
1146
+ for (const contributor of contributors) {
1147
+ const isExcluded = excludeSets.some((s) => s.has(contributor));
1148
+ if (!isExcluded) out.add(contributor);
1149
+ }
1150
+ }
1151
+ }
1152
+ function findAffectedSources(state, directChanges) {
1153
+ const changedFiles = filesByStatus(directChanges, "new", "changed");
1154
+ const deletedFiles = filesByStatus(directChanges, "deleted");
623
1155
  const conceptMap = buildConceptToSourcesMap(state.sources);
624
1156
  const affected = /* @__PURE__ */ new Set();
625
1157
  for (const changedFile of changedFiles) {
626
- const sourceEntry = state.sources[changedFile];
627
- if (!sourceEntry) continue;
628
- for (const slug of sourceEntry.concepts) {
629
- const contributors = conceptMap.get(slug);
630
- if (!contributors || contributors.length < 2) continue;
631
- for (const contributor of contributors) {
632
- const skip = changedFiles.has(contributor) || deletedFiles.has(contributor) || affected.has(contributor);
633
- if (!skip) {
634
- affected.add(contributor);
635
- }
636
- }
637
- }
1158
+ collectSharedContributors(
1159
+ changedFile,
1160
+ state,
1161
+ conceptMap,
1162
+ [changedFiles, deletedFiles, affected],
1163
+ affected
1164
+ );
638
1165
  }
639
1166
  return Array.from(affected);
640
1167
  }
@@ -676,36 +1203,36 @@ async function persistFrozenSlugs(root, frozenSlugs, successfulExtractions) {
676
1203
  const stateToSave = { ...currentState, frozenSlugs: Array.from(remaining) };
677
1204
  await writeState(root, stateToSave);
678
1205
  }
679
- function findLateAffectedSources(extractions, state, allChanges) {
680
- const compilingFiles = new Set(
681
- allChanges.filter((c) => c.status === "new" || c.status === "changed").map((c) => c.file)
682
- );
683
- const deletedFiles = new Set(
684
- allChanges.filter((c) => c.status === "deleted").map((c) => c.file)
685
- );
686
- const conceptMap = buildConceptToSourcesMap(state.sources);
1206
+ function collectFreshSlugs(extractions, state) {
687
1207
  const freshSlugs = /* @__PURE__ */ new Set();
688
1208
  for (const result of extractions) {
689
1209
  const oldConcepts = new Set(state.sources[result.sourceFile]?.concepts ?? []);
690
1210
  for (const c of result.concepts) {
691
1211
  const slug = slugify(c.concept);
692
- if (!oldConcepts.has(slug)) {
693
- freshSlugs.add(slug);
694
- }
1212
+ if (!oldConcepts.has(slug)) freshSlugs.add(slug);
695
1213
  }
696
1214
  }
1215
+ return freshSlugs;
1216
+ }
1217
+ function findSlugOwners(slugs, conceptMap, excludeSets) {
697
1218
  const affected = /* @__PURE__ */ new Set();
698
- for (const slug of freshSlugs) {
1219
+ for (const slug of slugs) {
699
1220
  const owners = conceptMap.get(slug);
700
1221
  if (!owners) continue;
701
1222
  for (const owner of owners) {
702
- if (!compilingFiles.has(owner) && !deletedFiles.has(owner)) {
703
- affected.add(owner);
704
- }
1223
+ const isExcluded = excludeSets.some((s) => s.has(owner));
1224
+ if (!isExcluded) affected.add(owner);
705
1225
  }
706
1226
  }
707
1227
  return Array.from(affected);
708
1228
  }
1229
+ function findLateAffectedSources(extractions, state, allChanges) {
1230
+ const compilingFiles = filesByStatus(allChanges, "new", "changed");
1231
+ const deletedFiles = filesByStatus(allChanges, "deleted");
1232
+ const conceptMap = buildConceptToSourcesMap(state.sources);
1233
+ const freshSlugs = collectFreshSlugs(extractions, state);
1234
+ return findSlugOwners(freshSlugs, conceptMap, [compilingFiles, deletedFiles]);
1235
+ }
709
1236
  function findSharedConcepts(sourceFile, state) {
710
1237
  const shared = /* @__PURE__ */ new Set();
711
1238
  const sourceEntry = state.sources[sourceFile];
@@ -735,7 +1262,7 @@ async function freezeFailedExtractions(root, results, frozenSlugs) {
735
1262
  }
736
1263
 
737
1264
  // src/compiler/orphan.ts
738
- import path7 from "path";
1265
+ import path9 from "path";
739
1266
  async function markOrphaned(root, sourceFile, state) {
740
1267
  const sourceEntry = state.sources[sourceFile];
741
1268
  if (!sourceEntry) return;
@@ -761,7 +1288,7 @@ async function orphanUnownedFrozenPages(root, frozenSlugs) {
761
1288
  }
762
1289
  }
763
1290
  async function orphanPage(root, slug, reason) {
764
- const pagePath = path7.join(root, CONCEPTS_DIR, `${slug}.md`);
1291
+ const pagePath = path9.join(root, CONCEPTS_DIR, `${slug}.md`);
765
1292
  const content = await safeReadFile(pagePath);
766
1293
  if (!content) return;
767
1294
  const { meta } = parseFrontmatter(content);
@@ -773,16 +1300,16 @@ async function orphanPage(root, slug, reason) {
773
1300
 
774
1301
  // src/compiler/resolver.ts
775
1302
  import { readdir as readdir2, readFile as readFile6 } from "fs/promises";
776
- import path8 from "path";
1303
+ import path10 from "path";
777
1304
  import { existsSync as existsSync2 } from "fs";
778
1305
  async function buildTitleIndex(root) {
779
- const conceptsDir = path8.join(root, CONCEPTS_DIR);
1306
+ const conceptsDir = path10.join(root, CONCEPTS_DIR);
780
1307
  if (!existsSync2(conceptsDir)) return [];
781
1308
  const files = await readdir2(conceptsDir);
782
1309
  const pages = [];
783
1310
  for (const file of files) {
784
1311
  if (!file.endsWith(".md")) continue;
785
- const filePath = path8.join(conceptsDir, file);
1312
+ const filePath = path10.join(conceptsDir, file);
786
1313
  const content = await readFile6(filePath, "utf-8");
787
1314
  const { meta } = parseFrontmatter(content);
788
1315
  if (meta.title && typeof meta.title === "string" && !meta.orphaned) {
@@ -802,25 +1329,41 @@ function isInsideWikilink(text, position) {
802
1329
  const closeBefore = text.indexOf("]]", before);
803
1330
  return closeBefore >= position;
804
1331
  }
1332
+ function isInsideCitation(text, position) {
1333
+ const before = text.lastIndexOf("^[", position);
1334
+ const after = text.indexOf("]", position);
1335
+ if (before === -1 || after === -1) return false;
1336
+ const closeBefore = text.indexOf("]", before);
1337
+ return closeBefore >= position;
1338
+ }
805
1339
  function isWordBoundary(text, start, end) {
806
1340
  const before = start === 0 || /[\s,.:;!?()\[\]{}/"']/.test(text[start - 1]);
807
1341
  const after = end >= text.length || /[\s,.:;!?()\[\]{}/"']/.test(text[end]);
808
1342
  return before && after;
809
1343
  }
810
- function addWikilinks(body, titles, selfTitle) {
1344
+ function findTitleMatches(text, title) {
1345
+ const escaped = title.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
1346
+ const regex = new RegExp(escaped, "gi");
1347
+ const matches = [];
1348
+ let match;
1349
+ while ((match = regex.exec(text)) !== null) {
1350
+ matches.push({ start: match.index, end: match.index + match[0].length });
1351
+ }
1352
+ return matches;
1353
+ }
1354
+ function isLinkablePosition(text, start, end) {
1355
+ if (isInsideWikilink(text, start)) return false;
1356
+ if (isInsideCitation(text, start)) return false;
1357
+ return isWordBoundary(text, start, end);
1358
+ }
1359
+ function addWikilinks(body, titles, selfTitle) {
811
1360
  let result = body;
1361
+ const selfLower = selfTitle.toLowerCase();
812
1362
  for (const page of titles) {
813
- if (page.title.toLowerCase() === selfTitle.toLowerCase()) continue;
814
- const escaped = page.title.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
815
- const regex = new RegExp(escaped, "gi");
816
- let match;
817
- const matches = [];
818
- while ((match = regex.exec(result)) !== null) {
819
- matches.push({ start: match.index, end: match.index + match[0].length });
820
- }
1363
+ if (page.title.toLowerCase() === selfLower) continue;
1364
+ const matches = findTitleMatches(result, page.title);
821
1365
  for (const m of matches.reverse()) {
822
- if (isInsideWikilink(result, m.start)) continue;
823
- if (!isWordBoundary(result, m.start, m.end)) continue;
1366
+ if (!isLinkablePosition(result, m.start, m.end)) continue;
824
1367
  result = result.slice(0, m.start) + `[[${page.title}]]` + result.slice(m.end);
825
1368
  }
826
1369
  }
@@ -876,41 +1419,43 @@ async function linkPage(page, titleIndex) {
876
1419
 
877
1420
  // src/compiler/indexgen.ts
878
1421
  import { readdir as readdir3 } from "fs/promises";
879
- import path9 from "path";
1422
+ import path11 from "path";
880
1423
  async function generateIndex(root) {
881
1424
  status("*", info("Generating index..."));
882
- const conceptsPath = path9.join(root, CONCEPTS_DIR);
883
- const queriesPath = path9.join(root, QUERIES_DIR);
1425
+ const conceptsPath = path11.join(root, CONCEPTS_DIR);
1426
+ const queriesPath = path11.join(root, QUERIES_DIR);
884
1427
  const concepts = await collectPageSummaries(conceptsPath);
885
1428
  const queries = await collectPageSummaries(queriesPath);
886
1429
  concepts.sort((a, b) => a.title.localeCompare(b.title));
887
1430
  queries.sort((a, b) => a.title.localeCompare(b.title));
888
1431
  const indexContent = buildIndexContent(concepts, queries);
889
- const indexPath = path9.join(root, INDEX_FILE);
1432
+ const indexPath = path11.join(root, INDEX_FILE);
890
1433
  await atomicWrite(indexPath, indexContent);
891
1434
  const total = concepts.length + queries.length;
892
1435
  status("+", success(`Index updated with ${total} pages.`));
893
1436
  }
894
- async function collectPageSummaries(conceptsPath) {
1437
+ async function scanWikiPages(dirPath) {
895
1438
  let files;
896
1439
  try {
897
- files = await readdir3(conceptsPath);
1440
+ files = await readdir3(dirPath);
898
1441
  } catch {
899
1442
  return [];
900
1443
  }
901
- const pages = [];
1444
+ const scanned = [];
902
1445
  for (const file of files.filter((f) => f.endsWith(".md"))) {
903
- const content = await safeReadFile(path9.join(conceptsPath, file));
1446
+ const content = await safeReadFile(path11.join(dirPath, file));
904
1447
  const { meta } = parseFrontmatter(content);
905
- if (meta.title && typeof meta.title === "string" && !meta.orphaned) {
906
- pages.push({
907
- title: meta.title,
908
- slug: file.replace(/\.md$/, ""),
909
- summary: typeof meta.summary === "string" ? meta.summary : ""
910
- });
911
- }
1448
+ scanned.push({ slug: file.replace(/\.md$/, ""), meta });
912
1449
  }
913
- return pages;
1450
+ return scanned;
1451
+ }
1452
+ async function collectPageSummaries(conceptsPath) {
1453
+ const scanned = await scanWikiPages(conceptsPath);
1454
+ return scanned.filter(({ meta }) => meta.title && typeof meta.title === "string" && !meta.orphaned).map(({ slug, meta }) => ({
1455
+ title: meta.title,
1456
+ slug,
1457
+ summary: typeof meta.summary === "string" ? meta.summary : ""
1458
+ }));
914
1459
  }
915
1460
  function stripWikilinks(text) {
916
1461
  return text.replace(/\[\[([^\]]+)\]\]/g, "$1");
@@ -933,84 +1478,611 @@ function buildIndexContent(concepts, queries) {
933
1478
  return lines.join("\n");
934
1479
  }
935
1480
 
1481
+ // src/compiler/obsidian.ts
1482
+ import { readdir as readdir4 } from "fs/promises";
1483
+ import path12 from "path";
1484
+ var ABBREVIATION_MIN_WORDS = 3;
1485
+ var SWAP_CONJUNCTIONS = [" and ", " or "];
1486
+ function addObsidianMeta(frontmatter, conceptTitle, tags) {
1487
+ frontmatter.tags = tags;
1488
+ frontmatter.aliases = generateAliases(conceptTitle);
1489
+ }
1490
+ function generateAliases(title) {
1491
+ const aliases = [];
1492
+ const slug = slugify(title);
1493
+ if (slug !== title) {
1494
+ aliases.push(slug);
1495
+ }
1496
+ const swapAlias = generateSwapAlias(title);
1497
+ if (swapAlias) {
1498
+ aliases.push(swapAlias);
1499
+ }
1500
+ const abbreviation = generateAbbreviation(title);
1501
+ if (abbreviation) {
1502
+ aliases.push(abbreviation);
1503
+ }
1504
+ return aliases;
1505
+ }
1506
+ function generateSwapAlias(title) {
1507
+ for (const conjunction of SWAP_CONJUNCTIONS) {
1508
+ const index = title.toLowerCase().indexOf(conjunction);
1509
+ if (index === -1) continue;
1510
+ const before = title.slice(0, index);
1511
+ const after = title.slice(index + conjunction.length);
1512
+ const originalConjunction = title.slice(index, index + conjunction.length);
1513
+ return `${after}${originalConjunction}${before}`;
1514
+ }
1515
+ return null;
1516
+ }
1517
+ function generateAbbreviation(title) {
1518
+ const words = title.split(/\s+/);
1519
+ if (words.length < ABBREVIATION_MIN_WORDS) return null;
1520
+ const abbreviation = words.map((w) => w[0].toUpperCase()).join("");
1521
+ if (abbreviation === title) return null;
1522
+ return abbreviation;
1523
+ }
1524
+ async function generateMOC(root) {
1525
+ const conceptsPath = path12.join(root, CONCEPTS_DIR);
1526
+ const pages = await loadConceptPages(conceptsPath);
1527
+ const tagGroups = groupPagesByTag(pages);
1528
+ const content = buildMOCContent(tagGroups);
1529
+ await atomicWrite(path12.join(root, MOC_FILE), content);
1530
+ }
1531
+ async function loadConceptPages(conceptsPath) {
1532
+ let files;
1533
+ try {
1534
+ files = await readdir4(conceptsPath);
1535
+ } catch {
1536
+ return [];
1537
+ }
1538
+ const pages = [];
1539
+ for (const file of files) {
1540
+ if (!file.endsWith(".md")) continue;
1541
+ const content = await safeReadFile(path12.join(conceptsPath, file));
1542
+ if (!content) continue;
1543
+ const { meta } = parseFrontmatter(content);
1544
+ if (meta.orphaned) continue;
1545
+ const title = typeof meta.title === "string" ? meta.title : file.replace(/\.md$/, "");
1546
+ const tags = Array.isArray(meta.tags) ? meta.tags : [];
1547
+ pages.push({ title, tags });
1548
+ }
1549
+ return pages;
1550
+ }
1551
+ function groupPagesByTag(pages) {
1552
+ const groups = /* @__PURE__ */ new Map();
1553
+ for (const page of pages) {
1554
+ if (page.tags.length === 0) {
1555
+ appendToGroup(groups, "Uncategorized", page.title);
1556
+ continue;
1557
+ }
1558
+ for (const tag of page.tags) {
1559
+ appendToGroup(groups, tag, page.title);
1560
+ }
1561
+ }
1562
+ return groups;
1563
+ }
1564
+ function appendToGroup(groups, key, title) {
1565
+ const existing = groups.get(key);
1566
+ if (existing) {
1567
+ existing.push(title);
1568
+ } else {
1569
+ groups.set(key, [title]);
1570
+ }
1571
+ }
1572
+ function buildMOCContent(tagGroups) {
1573
+ const lines = ["# Map of Content", ""];
1574
+ const sortedTags = [...tagGroups.keys()].sort((a, b) => {
1575
+ if (a === "Uncategorized") return 1;
1576
+ if (b === "Uncategorized") return -1;
1577
+ return a.localeCompare(b);
1578
+ });
1579
+ for (const tag of sortedTags) {
1580
+ const titles = tagGroups.get(tag) ?? [];
1581
+ lines.push(`## ${tag}`, "");
1582
+ for (const title of titles.sort()) {
1583
+ lines.push(`- [[${title}]]`);
1584
+ }
1585
+ lines.push("");
1586
+ }
1587
+ return lines.join("\n");
1588
+ }
1589
+
1590
+ // src/utils/embeddings.ts
1591
+ import { readFile as readFile7, readdir as readdir5 } from "fs/promises";
1592
+ import { existsSync as existsSync3 } from "fs";
1593
+ import path13 from "path";
1594
+ function cosineSimilarity(a, b) {
1595
+ if (a.length !== b.length || a.length === 0) return 0;
1596
+ let dot = 0;
1597
+ let magA = 0;
1598
+ let magB = 0;
1599
+ for (let i = 0; i < a.length; i++) {
1600
+ dot += a[i] * b[i];
1601
+ magA += a[i] * a[i];
1602
+ magB += b[i] * b[i];
1603
+ }
1604
+ if (magA === 0 || magB === 0) return 0;
1605
+ return dot / (Math.sqrt(magA) * Math.sqrt(magB));
1606
+ }
1607
+ function findTopK(queryVec, store, k) {
1608
+ const scored = store.entries.map((entry) => ({
1609
+ entry,
1610
+ score: cosineSimilarity(queryVec, entry.vector)
1611
+ }));
1612
+ scored.sort((left, right) => right.score - left.score);
1613
+ return scored.slice(0, k).map((item) => item.entry);
1614
+ }
1615
+ async function readEmbeddingStore(root) {
1616
+ const filePath = path13.join(root, EMBEDDINGS_FILE);
1617
+ if (!existsSync3(filePath)) return null;
1618
+ const raw = await readFile7(filePath, "utf-8");
1619
+ return JSON.parse(raw);
1620
+ }
1621
+ async function writeEmbeddingStore(root, store) {
1622
+ const filePath = path13.join(root, EMBEDDINGS_FILE);
1623
+ await atomicWrite(filePath, JSON.stringify(store, null, 2));
1624
+ }
1625
+ async function findRelevantPages(root, question) {
1626
+ const store = await readEmbeddingStore(root);
1627
+ if (!store || store.entries.length === 0) return [];
1628
+ const activeModel = resolveEmbeddingModel();
1629
+ if (store.model !== activeModel) {
1630
+ warnStaleEmbeddingStore(store.model, activeModel);
1631
+ return [];
1632
+ }
1633
+ const queryVec = await getProvider().embed(question);
1634
+ return findTopK(queryVec, store, EMBEDDING_TOP_K).map((entry) => ({
1635
+ slug: entry.slug,
1636
+ title: entry.title,
1637
+ summary: entry.summary
1638
+ }));
1639
+ }
1640
+ async function collectPageRecords(root) {
1641
+ const records = [];
1642
+ for (const dir of [CONCEPTS_DIR, QUERIES_DIR]) {
1643
+ const absDir = path13.join(root, dir);
1644
+ let files;
1645
+ try {
1646
+ files = await readdir5(absDir);
1647
+ } catch {
1648
+ continue;
1649
+ }
1650
+ for (const file of files.filter((f) => f.endsWith(".md"))) {
1651
+ const content = await safeReadFile(path13.join(absDir, file));
1652
+ const { meta } = parseFrontmatter(content);
1653
+ if (meta.orphaned || typeof meta.title !== "string") continue;
1654
+ records.push({
1655
+ slug: file.replace(/\.md$/, ""),
1656
+ title: meta.title,
1657
+ summary: typeof meta.summary === "string" ? meta.summary : ""
1658
+ });
1659
+ }
1660
+ }
1661
+ return records;
1662
+ }
1663
+ function buildEmbeddingText(record) {
1664
+ return record.summary ? `${record.title}
1665
+
1666
+ ${record.summary}` : record.title;
1667
+ }
1668
+ async function embedPages(records, slugsToEmbed) {
1669
+ const provider = getProvider();
1670
+ const now = (/* @__PURE__ */ new Date()).toISOString();
1671
+ const fresh = [];
1672
+ for (const record of records) {
1673
+ if (!slugsToEmbed.has(record.slug)) continue;
1674
+ const vector = await provider.embed(buildEmbeddingText(record));
1675
+ fresh.push({
1676
+ slug: record.slug,
1677
+ title: record.title,
1678
+ summary: record.summary,
1679
+ vector,
1680
+ updatedAt: now
1681
+ });
1682
+ }
1683
+ return fresh;
1684
+ }
1685
+ var warnedStaleModels = /* @__PURE__ */ new Set();
1686
+ function warnStaleEmbeddingStore(storedModel, activeModel) {
1687
+ const key = `${storedModel}\u2192${activeModel}`;
1688
+ if (warnedStaleModels.has(key)) return;
1689
+ warnedStaleModels.add(key);
1690
+ status(
1691
+ "!",
1692
+ warn(
1693
+ `Embedding store was built with "${storedModel}" but active embedding model is "${activeModel}". Falling back to full-index selection. Run 'llmwiki compile' to rebuild embeddings.`
1694
+ )
1695
+ );
1696
+ }
1697
+ function resolveEmbeddingModel() {
1698
+ const providerName = getActiveProviderName();
1699
+ const configuredModel = process.env.LLMWIKI_EMBEDDING_MODEL?.trim();
1700
+ if (configuredModel && (providerName === "openai" || providerName === "ollama")) {
1701
+ return configuredModel;
1702
+ }
1703
+ return EMBEDDING_MODELS[providerName] ?? EMBEDDING_MODELS.anthropic;
1704
+ }
1705
+ function mergeEntries(existing, fresh, liveSlugs) {
1706
+ const bySlug = /* @__PURE__ */ new Map();
1707
+ for (const entry of existing) {
1708
+ if (liveSlugs.has(entry.slug)) bySlug.set(entry.slug, entry);
1709
+ }
1710
+ for (const entry of fresh) {
1711
+ bySlug.set(entry.slug, entry);
1712
+ }
1713
+ return Array.from(bySlug.values());
1714
+ }
1715
+ async function updateEmbeddings(root, changedSlugs) {
1716
+ const records = await collectPageRecords(root);
1717
+ const liveSlugs = new Set(records.map((r) => r.slug));
1718
+ const embeddingModel = resolveEmbeddingModel();
1719
+ const existingStore = await readEmbeddingStore(root);
1720
+ const modelChanged = Boolean(existingStore && existingStore.model !== embeddingModel);
1721
+ const toEmbed = new Set(changedSlugs.filter((slug) => liveSlugs.has(slug)));
1722
+ const previousEntries = modelChanged ? [] : existingStore?.entries ?? [];
1723
+ if (!existingStore || modelChanged) {
1724
+ for (const record of records) toEmbed.add(record.slug);
1725
+ }
1726
+ if (!modelChanged && toEmbed.size === 0 && previousEntries.every((e) => liveSlugs.has(e.slug))) {
1727
+ return;
1728
+ }
1729
+ const freshEntries = await embedPages(records, toEmbed);
1730
+ const mergedEntries = mergeEntries(previousEntries, freshEntries, liveSlugs);
1731
+ const dimensions = mergedEntries[0]?.vector.length ?? 0;
1732
+ const store = {
1733
+ version: 1,
1734
+ model: embeddingModel,
1735
+ dimensions,
1736
+ entries: mergedEntries
1737
+ };
1738
+ await writeEmbeddingStore(root, store);
1739
+ status("*", dim(`Embeddings updated (${mergedEntries.length} pages).`));
1740
+ }
1741
+
1742
+ // src/compiler/candidates.ts
1743
+ import { readdir as readdir6, rename as rename3, unlink as unlink2, writeFile as writeFile4, mkdir as mkdir5 } from "fs/promises";
1744
+ import { existsSync as existsSync4 } from "fs";
1745
+ import path14 from "path";
1746
+ import { randomBytes } from "crypto";
1747
+ var ID_SUFFIX_BYTES = 4;
1748
+ var CANDIDATE_EXT = ".json";
1749
+ function buildCandidateId(slug) {
1750
+ const suffix = randomBytes(ID_SUFFIX_BYTES).toString("hex");
1751
+ return `${slug}-${suffix}`;
1752
+ }
1753
+ function candidatePath(root, id) {
1754
+ return path14.join(root, CANDIDATES_DIR, `${id}${CANDIDATE_EXT}`);
1755
+ }
1756
+ function archivePath(root, id) {
1757
+ return path14.join(root, CANDIDATES_ARCHIVE_DIR, `${id}${CANDIDATE_EXT}`);
1758
+ }
1759
+ async function writeCandidate(root, draft) {
1760
+ const candidate = {
1761
+ id: buildCandidateId(draft.slug),
1762
+ title: draft.title,
1763
+ slug: draft.slug,
1764
+ summary: draft.summary,
1765
+ sources: draft.sources,
1766
+ body: draft.body,
1767
+ generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
1768
+ ...draft.sourceStates ? { sourceStates: draft.sourceStates } : {}
1769
+ };
1770
+ await atomicWrite(candidatePath(root, candidate.id), JSON.stringify(candidate, null, 2));
1771
+ return candidate;
1772
+ }
1773
+ function failWithError(message) {
1774
+ status("!", error(message));
1775
+ process.exitCode = 1;
1776
+ return null;
1777
+ }
1778
+ async function loadCandidateOrFail(root, id) {
1779
+ const candidate = await readCandidate(root, id);
1780
+ if (!candidate) return failWithError(`Candidate not found: ${id}`);
1781
+ return candidate;
1782
+ }
1783
+ async function loadCandidateUnderLockOrFail(root, id) {
1784
+ const candidate = await readCandidate(root, id);
1785
+ if (!candidate) {
1786
+ return failWithError(`Candidate ${id} was removed by another process during review.`);
1787
+ }
1788
+ return candidate;
1789
+ }
1790
+ async function readCandidate(root, id) {
1791
+ const raw = await safeReadFile(candidatePath(root, id));
1792
+ if (!raw) return null;
1793
+ try {
1794
+ const parsed = JSON.parse(raw);
1795
+ if (!isValidCandidate(parsed)) return null;
1796
+ return parsed;
1797
+ } catch {
1798
+ return null;
1799
+ }
1800
+ }
1801
+ function isValidCandidate(value) {
1802
+ if (!value || typeof value !== "object") return false;
1803
+ const candidate = value;
1804
+ return typeof candidate.id === "string" && typeof candidate.title === "string" && typeof candidate.slug === "string" && typeof candidate.body === "string" && Array.isArray(candidate.sources);
1805
+ }
1806
+ async function listCandidates(root) {
1807
+ const dir = path14.join(root, CANDIDATES_DIR);
1808
+ if (!existsSync4(dir)) return [];
1809
+ const entries = await readdir6(dir, { withFileTypes: true });
1810
+ const candidates = [];
1811
+ for (const entry of entries) {
1812
+ if (!entry.isFile() || !entry.name.endsWith(CANDIDATE_EXT)) continue;
1813
+ const id = entry.name.slice(0, -CANDIDATE_EXT.length);
1814
+ const candidate = await readCandidate(root, id);
1815
+ if (candidate) candidates.push(candidate);
1816
+ }
1817
+ candidates.sort((a, b) => a.generatedAt.localeCompare(b.generatedAt));
1818
+ return candidates;
1819
+ }
1820
+ async function countCandidates(root) {
1821
+ const candidates = await listCandidates(root);
1822
+ return candidates.length;
1823
+ }
1824
+ async function deleteCandidate(root, id) {
1825
+ const filePath = candidatePath(root, id);
1826
+ if (!existsSync4(filePath)) return false;
1827
+ await unlink2(filePath);
1828
+ return true;
1829
+ }
1830
+ async function archiveCandidate(root, id) {
1831
+ const sourcePath = candidatePath(root, id);
1832
+ if (!existsSync4(sourcePath)) return false;
1833
+ const target = archivePath(root, id);
1834
+ await mkdir5(path14.dirname(target), { recursive: true });
1835
+ try {
1836
+ await rename3(sourcePath, target);
1837
+ } catch {
1838
+ const raw = await safeReadFile(sourcePath);
1839
+ await writeFile4(target, raw, "utf-8");
1840
+ await unlink2(sourcePath);
1841
+ }
1842
+ return true;
1843
+ }
1844
+
1845
+ // src/compiler/page-renderer.ts
1846
+ import { readdir as readdir7 } from "fs/promises";
1847
+ import path15 from "path";
1848
+
1849
+ // src/compiler/provenance.ts
1850
+ function addProvenanceMeta(fields, concept) {
1851
+ if (typeof concept.confidence === "number") {
1852
+ fields.confidence = concept.confidence;
1853
+ }
1854
+ if (concept.provenanceState) {
1855
+ fields.provenanceState = concept.provenanceState;
1856
+ }
1857
+ if (concept.contradictedBy && concept.contradictedBy.length > 0) {
1858
+ fields.contradictedBy = concept.contradictedBy;
1859
+ }
1860
+ if (typeof concept.inferredParagraphs === "number") {
1861
+ fields.inferredParagraphs = concept.inferredParagraphs;
1862
+ }
1863
+ }
1864
+ function reportContradictionWarnings(conceptTitle, concept) {
1865
+ const refs = concept.contradictedBy;
1866
+ if (!refs || refs.length === 0) return;
1867
+ const slugs = refs.map((r) => r.slug).join(", ");
1868
+ status(
1869
+ "!",
1870
+ warn(`Contradiction reported on "${conceptTitle}" \u2014 conflicts with: ${slugs}`)
1871
+ );
1872
+ }
1873
+
1874
+ // src/compiler/page-renderer.ts
1875
+ var RELATED_PAGE_CONTEXT_LIMIT = 5;
1876
+ async function renderMergedPageContent(root, entry) {
1877
+ const pagePath = path15.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
1878
+ const existingPage = await safeReadFile(pagePath);
1879
+ const relatedPages = await loadRelatedPages(root, entry.slug);
1880
+ const system = buildPagePrompt(
1881
+ entry.concept.concept,
1882
+ entry.combinedContent,
1883
+ existingPage,
1884
+ relatedPages
1885
+ );
1886
+ const pageBody = await callClaude({
1887
+ system,
1888
+ messages: [
1889
+ { role: "user", content: `Write the wiki page for "${entry.concept.concept}".` }
1890
+ ]
1891
+ });
1892
+ const frontmatter = buildMergedFrontmatter(entry, existingPage);
1893
+ reportContradictionWarnings(entry.concept.concept, entry.concept);
1894
+ return `${frontmatter}
1895
+
1896
+ ${pageBody}
1897
+ `;
1898
+ }
1899
+ function buildMergedFrontmatter(entry, existingPage) {
1900
+ const now = (/* @__PURE__ */ new Date()).toISOString();
1901
+ const existing = existingPage ? parseFrontmatter(existingPage) : null;
1902
+ const createdAt = existing?.meta.createdAt && typeof existing.meta.createdAt === "string" ? existing.meta.createdAt : now;
1903
+ const frontmatterFields = {
1904
+ title: entry.concept.concept,
1905
+ summary: entry.concept.summary,
1906
+ sources: entry.sourceFiles,
1907
+ createdAt,
1908
+ updatedAt: now
1909
+ };
1910
+ addObsidianMeta(frontmatterFields, entry.concept.concept, entry.concept.tags ?? []);
1911
+ addProvenanceMeta(frontmatterFields, entry.concept);
1912
+ return buildFrontmatter(frontmatterFields);
1913
+ }
1914
+ async function loadRelatedPages(root, excludeSlug) {
1915
+ const conceptsPath = path15.join(root, CONCEPTS_DIR);
1916
+ let files;
1917
+ try {
1918
+ files = await readdir7(conceptsPath);
1919
+ } catch {
1920
+ return "";
1921
+ }
1922
+ const related = files.filter((f) => f.endsWith(".md") && f !== `${excludeSlug}.md`).slice(0, RELATED_PAGE_CONTEXT_LIMIT);
1923
+ const contents = [];
1924
+ for (const f of related) {
1925
+ const content = await safeReadFile(path15.join(conceptsPath, f));
1926
+ if (!content) continue;
1927
+ const { meta } = parseFrontmatter(content);
1928
+ if (meta.orphaned) continue;
1929
+ contents.push(content);
1930
+ }
1931
+ return contents.join("\n\n---\n\n");
1932
+ }
1933
+
936
1934
  // src/compiler/index.ts
937
1935
  import pLimit from "p-limit";
938
- async function compile(root) {
1936
+ function emptyCompileResult() {
1937
+ return { compiled: 0, skipped: 0, deleted: 0, concepts: [], pages: [], errors: [] };
1938
+ }
1939
+ async function compile(root, options = {}) {
1940
+ await compileAndReport(root, options);
1941
+ }
1942
+ async function compileAndReport(root, options = {}) {
939
1943
  header("llmwiki compile");
940
1944
  const locked = await acquireLock(root);
941
1945
  if (!locked) {
942
1946
  status("!", error("Could not acquire lock. Try again later."));
943
- return;
1947
+ return {
1948
+ ...emptyCompileResult(),
1949
+ errors: ["Could not acquire .llmwiki/lock \u2014 another compile is in progress."]
1950
+ };
944
1951
  }
945
1952
  try {
946
- await runCompilePipeline(root);
1953
+ return await runCompilePipeline(root, options);
947
1954
  } finally {
948
1955
  await releaseLock(root);
949
1956
  }
950
1957
  }
951
- async function runCompilePipeline(root) {
1958
+ function bucketChanges(changes) {
1959
+ return {
1960
+ toCompile: changes.filter((c) => c.status === "new" || c.status === "changed"),
1961
+ deleted: changes.filter((c) => c.status === "deleted"),
1962
+ unchanged: changes.filter((c) => c.status === "unchanged")
1963
+ };
1964
+ }
1965
+ async function generatePagesPhase(root, extractions, frozenSlugs, options) {
1966
+ const merged = mergeExtractions(extractions, frozenSlugs);
1967
+ const sourceStates = options.review ? await buildExtractionSourceStates(root, extractions) : {};
1968
+ const limit = pLimit(COMPILE_CONCURRENCY);
1969
+ const errors = [];
1970
+ const candidates = [];
1971
+ const pages = await Promise.all(
1972
+ merged.map((entry) => limit(async () => {
1973
+ const result = await generateMergedPage(root, entry, options, sourceStates);
1974
+ if (result.error) errors.push(result.error);
1975
+ if (result.candidateId) candidates.push(result.candidateId);
1976
+ return entry;
1977
+ }))
1978
+ );
1979
+ return { pages, errors, candidates };
1980
+ }
1981
+ async function persistExtractionStates(root, extractions) {
1982
+ for (const result of extractions) {
1983
+ if (result.concepts.length === 0) continue;
1984
+ await persistSourceState(root, result.sourcePath, result.sourceFile, result.concepts);
1985
+ }
1986
+ }
1987
+ function summarizeCompile(buckets, generation, extractions, options) {
1988
+ header("Compilation complete");
1989
+ status("\u2713", success(
1990
+ `${buckets.toCompile.length} compiled, ${buckets.unchanged.length} skipped, ${buckets.deleted.length} deleted`
1991
+ ));
1992
+ if (options.review && generation.candidates.length > 0) {
1993
+ status("?", info(
1994
+ `${generation.candidates.length} candidate(s) awaiting review \u2014 run \`llmwiki review list\``
1995
+ ));
1996
+ } else if (buckets.toCompile.length > 0) {
1997
+ status("\u2192", dim('Next: llmwiki query "your question here"'));
1998
+ }
1999
+ const errors = [...generation.errors];
2000
+ for (const result of extractions) {
2001
+ if (result.concepts.length === 0) {
2002
+ errors.push(`No concepts extracted from ${result.sourceFile}`);
2003
+ }
2004
+ }
2005
+ const baseResult = {
2006
+ compiled: buckets.toCompile.length,
2007
+ skipped: buckets.unchanged.length,
2008
+ deleted: buckets.deleted.length,
2009
+ concepts: generation.pages.map((entry) => entry.concept.concept),
2010
+ pages: generation.pages.map((entry) => entry.slug),
2011
+ errors
2012
+ };
2013
+ if (options.review) {
2014
+ baseResult.candidates = generation.candidates;
2015
+ }
2016
+ return baseResult;
2017
+ }
2018
+ async function runCompilePipeline(root, options) {
952
2019
  const state = await readState(root);
953
2020
  const changes = await detectChanges(root, state);
954
- const affectedFiles = findAffectedSources(state, changes);
955
- for (const file of affectedFiles) {
956
- status("~", info(`${file} [affected by shared concept]`));
957
- changes.push({ file, status: "changed" });
958
- }
959
- const toCompile = changes.filter((c) => c.status === "new" || c.status === "changed");
960
- const deleted = changes.filter((c) => c.status === "deleted");
961
- const unchanged = changes.filter((c) => c.status === "unchanged");
962
- if (toCompile.length === 0 && deleted.length === 0) {
2021
+ augmentWithAffectedSources(changes, findAffectedSources(state, changes));
2022
+ const buckets = bucketChanges(changes);
2023
+ if (buckets.toCompile.length === 0 && buckets.deleted.length === 0) {
963
2024
  status("\u2713", success("Nothing to compile \u2014 all sources up to date."));
964
- return;
2025
+ return { ...emptyCompileResult(), skipped: buckets.unchanged.length };
965
2026
  }
966
2027
  printChangesSummary(changes);
2028
+ if (!options.review) {
2029
+ await markDeletedAsOrphaned(root, buckets.deleted, state);
2030
+ }
2031
+ const frozenSlugs = findFrozenSlugs(state, changes);
2032
+ reportFrozenSlugs(frozenSlugs);
2033
+ const extractions = await runExtractionPhases(root, buckets.toCompile, state, changes);
2034
+ if (!options.review) {
2035
+ await freezeFailedExtractions(root, extractions, frozenSlugs);
2036
+ }
2037
+ const generation = await generatePagesPhase(root, extractions, frozenSlugs, options);
2038
+ if (!options.review) {
2039
+ await persistExtractionStates(root, extractions);
2040
+ if (frozenSlugs.size > 0) {
2041
+ await orphanUnownedFrozenPages(root, frozenSlugs);
2042
+ }
2043
+ await persistFrozenSlugs(root, frozenSlugs, extractions);
2044
+ await finalizeWiki(root, generation.pages);
2045
+ }
2046
+ return summarizeCompile(buckets, generation, extractions, options);
2047
+ }
2048
+ function augmentWithAffectedSources(changes, affected) {
2049
+ for (const file of affected) {
2050
+ status("~", info(`${file} [affected by shared concept]`));
2051
+ changes.push({ file, status: "changed" });
2052
+ }
2053
+ }
2054
+ async function markDeletedAsOrphaned(root, deleted, state) {
967
2055
  for (const del of deleted) {
968
2056
  await markOrphaned(root, del.file, state);
969
2057
  }
970
- const frozenSlugs = findFrozenSlugs(state, changes);
2058
+ }
2059
+ function reportFrozenSlugs(frozenSlugs) {
971
2060
  for (const slug of frozenSlugs) {
972
2061
  status("i", dim(`Frozen: ${slug} (shared with deleted source)`));
973
2062
  }
2063
+ }
2064
+ async function runExtractionPhases(root, toCompile, state, allChanges) {
974
2065
  const extractions = [];
975
2066
  for (const change of toCompile) {
976
2067
  extractions.push(await extractForSource(root, change.file));
977
2068
  }
978
- const lateAffected = findLateAffectedSources(extractions, state, changes);
2069
+ const lateAffected = findLateAffectedSources(extractions, state, allChanges);
979
2070
  for (const file of lateAffected) {
980
2071
  status("~", info(`${file} [shares concept with new source]`));
981
2072
  extractions.push(await extractForSource(root, file));
982
2073
  }
983
- await freezeFailedExtractions(root, extractions, frozenSlugs);
984
- const merged = mergeExtractions(extractions, frozenSlugs);
985
- const limit = pLimit(COMPILE_CONCURRENCY);
986
- const pageResults = await Promise.all(
987
- merged.map((entry) => limit(async () => {
988
- await generateMergedPage(root, entry);
989
- return entry;
990
- }))
991
- );
992
- const allChangedSlugs = pageResults.map((e) => e.slug);
993
- const allNewSlugs = pageResults.filter((e) => e.concept.is_new).map((e) => e.slug);
994
- for (const result of extractions) {
995
- if (result.concepts.length === 0) continue;
996
- await persistSourceState(root, result.sourcePath, result.sourceFile, result.concepts);
997
- }
998
- if (frozenSlugs.size > 0) {
999
- await orphanUnownedFrozenPages(root, frozenSlugs);
1000
- }
1001
- await persistFrozenSlugs(root, frozenSlugs, extractions);
2074
+ return extractions;
2075
+ }
2076
+ async function finalizeWiki(root, pages) {
2077
+ const allChangedSlugs = pages.map((entry) => entry.slug);
2078
+ const allNewSlugs = pages.filter((entry) => entry.concept.is_new).map((entry) => entry.slug);
1002
2079
  if (allChangedSlugs.length > 0) {
1003
2080
  status("\u{1F517}", info("Resolving interlinks..."));
1004
2081
  await resolveLinks(root, allChangedSlugs, allNewSlugs);
1005
2082
  }
1006
2083
  await generateIndex(root);
1007
- header("Compilation complete");
1008
- status("\u2713", success(
1009
- `${toCompile.length} compiled, ${unchanged.length} skipped, ${deleted.length} deleted`
1010
- ));
1011
- if (toCompile.length > 0) {
1012
- status("\u2192", dim('Next: llmwiki query "your question here"'));
1013
- }
2084
+ await generateMOC(root);
2085
+ await safelyUpdateEmbeddings(root, allChangedSlugs);
1014
2086
  }
1015
2087
  function printChangesSummary(changes) {
1016
2088
  const iconMap = {
@@ -1033,9 +2105,9 @@ function printChangesSummary(changes) {
1033
2105
  }
1034
2106
  async function extractForSource(root, sourceFile) {
1035
2107
  status("*", info(`Extracting: ${sourceFile}`));
1036
- const sourcePath = path10.join(root, SOURCES_DIR, sourceFile);
1037
- const sourceContent = await readFile7(sourcePath, "utf-8");
1038
- const existingIndex = await safeReadFile(path10.join(root, INDEX_FILE));
2108
+ const sourcePath = path16.join(root, SOURCES_DIR, sourceFile);
2109
+ const sourceContent = await readFile8(sourcePath, "utf-8");
2110
+ const existingIndex = await safeReadFile(path16.join(root, INDEX_FILE));
1039
2111
  const concepts = await extractConcepts(sourceContent, existingIndex);
1040
2112
  if (concepts.length > 0) {
1041
2113
  const names = concepts.map((c) => c.concept).join(", ");
@@ -1043,6 +2115,26 @@ async function extractForSource(root, sourceFile) {
1043
2115
  }
1044
2116
  return { sourceFile, sourcePath, sourceContent, concepts };
1045
2117
  }
2118
+ function reconcileConceptMetadata(existing, incoming) {
2119
+ const reconciled = { ...existing };
2120
+ if (typeof incoming.confidence === "number") {
2121
+ reconciled.confidence = typeof existing.confidence === "number" ? Math.min(existing.confidence, incoming.confidence) : incoming.confidence;
2122
+ }
2123
+ reconciled.provenanceState = "merged";
2124
+ const refs = [...existing.contradictedBy ?? []];
2125
+ const seenSlugs = new Set(refs.map((r) => r.slug));
2126
+ for (const ref of incoming.contradictedBy ?? []) {
2127
+ if (!seenSlugs.has(ref.slug)) {
2128
+ refs.push(ref);
2129
+ seenSlugs.add(ref.slug);
2130
+ }
2131
+ }
2132
+ reconciled.contradictedBy = refs.length > 0 ? refs : void 0;
2133
+ if (typeof incoming.inferredParagraphs === "number") {
2134
+ reconciled.inferredParagraphs = typeof existing.inferredParagraphs === "number" ? Math.max(existing.inferredParagraphs, incoming.inferredParagraphs) : incoming.inferredParagraphs;
2135
+ }
2136
+ return reconciled;
2137
+ }
1046
2138
  function mergeExtractions(extractions, frozenSlugs) {
1047
2139
  const bySlug = /* @__PURE__ */ new Map();
1048
2140
  for (const result of extractions) {
@@ -1052,6 +2144,7 @@ function mergeExtractions(extractions, frozenSlugs) {
1052
2144
  if (frozenSlugs.has(slug)) continue;
1053
2145
  const existing = bySlug.get(slug);
1054
2146
  if (existing) {
2147
+ existing.concept = reconcileConceptMetadata(existing.concept, concept);
1055
2148
  existing.sourceFiles.push(result.sourceFile);
1056
2149
  existing.combinedContent += `
1057
2150
 
@@ -1072,37 +2165,26 @@ ${result.sourceContent}`
1072
2165
  }
1073
2166
  return Array.from(bySlug.values());
1074
2167
  }
1075
- async function generateMergedPage(root, entry) {
1076
- const pagePath = path10.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
1077
- const existingPage = await safeReadFile(pagePath);
1078
- const relatedPages = await loadRelatedPages(root, entry.slug);
1079
- const system = buildPagePrompt(
1080
- entry.concept.concept,
1081
- entry.combinedContent,
1082
- existingPage,
1083
- relatedPages
1084
- );
1085
- const pageBody = await callClaude({
1086
- system,
1087
- messages: [
1088
- { role: "user", content: `Write the wiki page for "${entry.concept.concept}".` }
1089
- ]
1090
- });
1091
- const now = (/* @__PURE__ */ new Date()).toISOString();
1092
- const existing = existingPage ? parseFrontmatter(existingPage) : null;
1093
- const createdAt = existing?.meta.createdAt && typeof existing.meta.createdAt === "string" ? existing.meta.createdAt : now;
1094
- const frontmatter = buildFrontmatter({
2168
+ async function generateMergedPage(root, entry, options, sourceStates) {
2169
+ const fullPage = await renderMergedPageContent(root, entry);
2170
+ if (options.review) {
2171
+ return await persistReviewCandidate(root, entry, fullPage, sourceStates);
2172
+ }
2173
+ const pagePath = path16.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
2174
+ const error2 = await writePageIfValid(pagePath, fullPage, entry.concept.concept);
2175
+ return { error: error2 ?? void 0 };
2176
+ }
2177
+ async function persistReviewCandidate(root, entry, fullPage, sourceStates) {
2178
+ const candidate = await writeCandidate(root, {
1095
2179
  title: entry.concept.concept,
2180
+ slug: entry.slug,
1096
2181
  summary: entry.concept.summary,
1097
2182
  sources: entry.sourceFiles,
1098
- createdAt,
1099
- updatedAt: now
2183
+ body: fullPage,
2184
+ sourceStates: pickStatesForSources(sourceStates, entry.sourceFiles)
1100
2185
  });
1101
- const fullPage = `${frontmatter}
1102
-
1103
- ${pageBody}
1104
- `;
1105
- await writePageIfValid(pagePath, fullPage, entry.concept.concept);
2186
+ status("?", info(`Candidate ready: ${candidate.id} (${entry.slug})`));
2187
+ return { candidateId: candidate.id };
1106
2188
  }
1107
2189
  async function extractConcepts(sourceContent, existingIndex) {
1108
2190
  const system = buildExtractionPrompt(sourceContent, existingIndex);
@@ -1113,31 +2195,21 @@ async function extractConcepts(sourceContent, existingIndex) {
1113
2195
  });
1114
2196
  return parseConcepts(rawOutput);
1115
2197
  }
1116
- async function loadRelatedPages(root, excludeSlug) {
1117
- const conceptsPath = path10.join(root, CONCEPTS_DIR);
1118
- let files;
1119
- try {
1120
- files = await readdir4(conceptsPath);
1121
- } catch {
1122
- return "";
1123
- }
1124
- const related = files.filter((f) => f.endsWith(".md") && f !== `${excludeSlug}.md`).slice(0, 5);
1125
- const contents = [];
1126
- for (const f of related) {
1127
- const content = await safeReadFile(path10.join(conceptsPath, f));
1128
- if (!content) continue;
1129
- const { meta } = parseFrontmatter(content);
1130
- if (meta.orphaned) continue;
1131
- contents.push(content);
1132
- }
1133
- return contents.join("\n\n---\n\n");
1134
- }
1135
2198
  async function writePageIfValid(pagePath, content, conceptTitle) {
1136
2199
  if (!validateWikiPage(content)) {
1137
2200
  status("!", warn(`Invalid page for "${conceptTitle}" \u2014 skipped.`));
1138
- return;
2201
+ return `Invalid page for "${conceptTitle}" \u2014 failed validation`;
1139
2202
  }
1140
2203
  await atomicWrite(pagePath, content);
2204
+ return null;
2205
+ }
2206
+ async function safelyUpdateEmbeddings(root, changedSlugs) {
2207
+ try {
2208
+ await updateEmbeddings(root, changedSlugs);
2209
+ } catch (err) {
2210
+ const message = err instanceof Error ? err.message : String(err);
2211
+ status("!", warn(`Skipped embeddings update: ${message}`));
2212
+ }
1141
2213
  }
1142
2214
  async function persistSourceState(root, sourcePath, sourceFile, concepts) {
1143
2215
  const hash = await hashFile(sourcePath);
@@ -1150,20 +2222,20 @@ async function persistSourceState(root, sourcePath, sourceFile, concepts) {
1150
2222
  }
1151
2223
 
1152
2224
  // src/commands/compile.ts
1153
- async function compileCommand() {
1154
- if (!existsSync3(SOURCES_DIR)) {
2225
+ async function compileCommand(options = {}) {
2226
+ if (!existsSync5(SOURCES_DIR)) {
1155
2227
  status(
1156
2228
  "!",
1157
2229
  warn("No sources found. Run `llmwiki ingest <url>` first.")
1158
2230
  );
1159
2231
  return;
1160
2232
  }
1161
- await compile(process.cwd());
2233
+ await compile(process.cwd(), options);
1162
2234
  }
1163
2235
 
1164
2236
  // src/commands/query.ts
1165
- import { existsSync as existsSync4 } from "fs";
1166
- import path11 from "path";
2237
+ import { existsSync as existsSync6 } from "fs";
2238
+ import path17 from "path";
1167
2239
  var PAGE_DIRS = [CONCEPTS_DIR, QUERIES_DIR];
1168
2240
  var PAGE_SELECTION_TOOL = {
1169
2241
  name: "select_pages",
@@ -1208,12 +2280,35 @@ ${indexContent}`;
1208
2280
  return { pages: [], reasoning: "Failed to parse page selection response" };
1209
2281
  }
1210
2282
  }
2283
+ function buildFilteredIndex(candidates) {
2284
+ return candidates.map((entry) => `- **${entry.slug}**: ${entry.title} \u2014 ${entry.summary}`).join("\n");
2285
+ }
2286
+ async function selectRelevantPages(root, question) {
2287
+ const candidates = await tryFindRelevantPages(root, question);
2288
+ if (candidates.length > 0) {
2289
+ const filteredIndex = buildFilteredIndex(candidates);
2290
+ const { pages: rawPages2, reasoning: reasoning2 } = await selectPages(question, filteredIndex);
2291
+ return { pages: rawPages2, rawPages: rawPages2, reasoning: reasoning2 };
2292
+ }
2293
+ const indexContent = await safeReadFile(path17.join(root, INDEX_FILE));
2294
+ const { pages: rawPages, reasoning } = await selectPages(question, indexContent);
2295
+ return { pages: rawPages.map((p) => slugify(p)), rawPages, reasoning };
2296
+ }
2297
+ async function tryFindRelevantPages(root, question) {
2298
+ try {
2299
+ return await findRelevantPages(root, question);
2300
+ } catch (err) {
2301
+ const message = err instanceof Error ? err.message : String(err);
2302
+ status("!", dim(`Semantic pre-filter unavailable (${message}); using full index.`));
2303
+ return [];
2304
+ }
2305
+ }
1211
2306
  async function loadSelectedPages(root, slugs) {
1212
2307
  const sections = [];
1213
2308
  for (const slug of slugs) {
1214
2309
  let content = "";
1215
2310
  for (const dir of PAGE_DIRS) {
1216
- const candidate = await safeReadFile(path11.join(root, dir, `${slug}.md`));
2311
+ const candidate = await safeReadFile(path17.join(root, dir, `${slug}.md`));
1217
2312
  if (!candidate) continue;
1218
2313
  const { meta } = parseFrontmatter(candidate);
1219
2314
  if (meta.orphaned) continue;
@@ -1229,20 +2324,18 @@ ${content}`);
1229
2324
  }
1230
2325
  return sections.join("\n\n");
1231
2326
  }
1232
- async function streamAnswer(question, pagesContent) {
1233
- const systemPrompt = "You are a knowledge assistant. Answer the question using ONLY the wiki content provided. Cite specific pages using [[Page Title]] wikilinks. If the wiki doesn't contain enough information, say so.";
2327
+ var ANSWER_SYSTEM_PROMPT = "You are a knowledge assistant. Answer the question using ONLY the wiki content provided. Cite specific pages using [[Page Title]] wikilinks. If the wiki doesn't contain enough information, say so.";
2328
+ async function callAnswerLLM(question, pagesContent, onToken) {
1234
2329
  const userMessage = `Question: ${question}
1235
2330
 
1236
2331
  Relevant wiki pages:
1237
2332
  ${pagesContent}`;
1238
- const answer = await callClaude({
1239
- system: systemPrompt,
2333
+ return callClaude({
2334
+ system: ANSWER_SYSTEM_PROMPT,
1240
2335
  messages: [{ role: "user", content: userMessage }],
1241
- stream: true,
1242
- onToken: (text) => process.stdout.write(text)
2336
+ stream: Boolean(onToken),
2337
+ onToken
1243
2338
  });
1244
- process.stdout.write("\n");
1245
- return answer;
1246
2339
  }
1247
2340
  function summarizeAnswer(answer) {
1248
2341
  const firstLine = answer.trim().split(/\n/)[0] ?? "";
@@ -1251,7 +2344,7 @@ function summarizeAnswer(answer) {
1251
2344
  }
1252
2345
  async function saveQueryPage(root, question, answer) {
1253
2346
  const slug = slugify(question);
1254
- const filePath = path11.join(root, QUERIES_DIR, `${slug}.md`);
2347
+ const filePath = path17.join(root, QUERIES_DIR, `${slug}.md`);
1255
2348
  const frontmatter = buildFrontmatter({
1256
2349
  title: question,
1257
2350
  summary: summarizeAnswer(answer),
@@ -1268,27 +2361,52 @@ ${answer}
1268
2361
  success(`Saved query \u2192 ${source(filePath)}`)
1269
2362
  );
1270
2363
  await generateIndex(root);
2364
+ try {
2365
+ await updateEmbeddings(root, [slug]);
2366
+ } catch (err) {
2367
+ const message = err instanceof Error ? err.message : String(err);
2368
+ status("!", warn(`Skipped embeddings update: ${message}`));
2369
+ }
2370
+ return slug;
2371
+ }
2372
+ async function generateAnswer(root, question, options = {}) {
2373
+ if (!existsSync6(path17.join(root, INDEX_FILE))) {
2374
+ throw new Error("Wiki index not found. Run `llmwiki compile` first.");
2375
+ }
2376
+ const { pages, reasoning } = await selectRelevantPages(root, question);
2377
+ options.onPageSelection?.(pages, reasoning);
2378
+ const pagesContent = await loadSelectedPages(root, pages);
2379
+ if (!pagesContent) {
2380
+ return { answer: "", selectedPages: pages, reasoning };
2381
+ }
2382
+ const answer = await callAnswerLLM(question, pagesContent, options.onToken);
2383
+ let saved;
2384
+ if (options.save) {
2385
+ saved = await saveQueryPage(root, question, answer);
2386
+ }
2387
+ return { answer, selectedPages: pages, reasoning, saved };
1271
2388
  }
1272
2389
  async function queryCommand(root, question, options) {
1273
- if (!existsSync4(path11.join(root, INDEX_FILE))) {
2390
+ if (!existsSync6(path17.join(root, INDEX_FILE))) {
1274
2391
  status("!", error("Wiki index not found. Run `llmwiki compile` first."));
1275
2392
  return;
1276
2393
  }
1277
2394
  header("Selecting relevant pages");
1278
- const indexContent = await safeReadFile(path11.join(root, INDEX_FILE));
1279
- const { pages: rawPages, reasoning } = await selectPages(question, indexContent);
1280
- const pages = rawPages.map((p) => slugify(p));
1281
- status("i", dim(`Reasoning: ${reasoning}`));
1282
- status("*", info(`Selected ${pages.length} page(s): ${rawPages.join(", ")}`));
1283
- header("Generating answer");
1284
- const pagesContent = await loadSelectedPages(root, pages);
1285
- if (!pagesContent) {
2395
+ const result = await generateAnswer(root, question, {
2396
+ save: options.save,
2397
+ onToken: (text) => process.stdout.write(text),
2398
+ onPageSelection: (pages, reasoning) => {
2399
+ status("i", dim(`Reasoning: ${reasoning}`));
2400
+ status("*", info(`Selected ${pages.length} page(s): ${pages.join(", ")}`));
2401
+ header("Generating answer");
2402
+ }
2403
+ });
2404
+ process.stdout.write("\n");
2405
+ if (!result.answer) {
1286
2406
  status("!", error("No matching pages found. Try refining your question."));
1287
2407
  return;
1288
2408
  }
1289
- const answer = await streamAnswer(question, pagesContent);
1290
- if (options.save) {
1291
- await saveQueryPage(root, question, answer);
2409
+ if (result.saved) {
1292
2410
  status("\u2192", dim("Saved. Future queries will use this answer as context."));
1293
2411
  } else {
1294
2412
  status("\u2192", dim("Tip: use --save to add this answer to your wiki"));
@@ -1297,12 +2415,12 @@ async function queryCommand(root, question, options) {
1297
2415
 
1298
2416
  // src/commands/watch.ts
1299
2417
  import { watch as chokidarWatch } from "chokidar";
1300
- import { existsSync as existsSync5 } from "fs";
1301
- import path12 from "path";
2418
+ import { existsSync as existsSync7 } from "fs";
2419
+ import path18 from "path";
1302
2420
  var DEBOUNCE_MS = 500;
1303
2421
  async function watchCommand() {
1304
- const sourcesPath = path12.resolve(SOURCES_DIR);
1305
- if (!existsSync5(sourcesPath)) {
2422
+ const sourcesPath = path18.resolve(SOURCES_DIR);
2423
+ if (!existsSync7(sourcesPath)) {
1306
2424
  status(
1307
2425
  "!",
1308
2426
  warn("No sources/ directory found. Run `llmwiki ingest <url>` first.")
@@ -1336,7 +2454,7 @@ async function watchCommand() {
1336
2454
  const scheduleCompile = (eventPath, event) => {
1337
2455
  status(
1338
2456
  "~",
1339
- dim(`${event}: ${path12.basename(eventPath)}`)
2457
+ dim(`${event}: ${path18.basename(eventPath)}`)
1340
2458
  );
1341
2459
  if (debounceTimer) clearTimeout(debounceTimer);
1342
2460
  debounceTimer = setTimeout(triggerCompile, DEBOUNCE_MS);
@@ -1350,6 +2468,807 @@ async function watchCommand() {
1350
2468
  });
1351
2469
  }
1352
2470
 
2471
+ // src/linter/rules.ts
2472
+ import { readdir as readdir8, readFile as readFile9 } from "fs/promises";
2473
+ import { existsSync as existsSync8 } from "fs";
2474
+ import path19 from "path";
2475
+ var MIN_BODY_LENGTH = 50;
2476
+ var WIKILINK_PATTERN = /\[\[([^\]]+)\]\]/g;
2477
+ var CITATION_PATTERN = /\^\[([^\]]+)\]/g;
2478
+ function findMatchesInContent(content, pattern) {
2479
+ const results = [];
2480
+ const lines = content.split("\n");
2481
+ for (let i = 0; i < lines.length; i++) {
2482
+ const matches = lines[i].matchAll(pattern);
2483
+ for (const match of matches) {
2484
+ results.push({ captured: match[1], line: i + 1 });
2485
+ }
2486
+ }
2487
+ return results;
2488
+ }
2489
+ async function readMarkdownFiles(dirPath) {
2490
+ if (!existsSync8(dirPath)) return [];
2491
+ const entries = await readdir8(dirPath);
2492
+ const mdFiles = entries.filter((f) => f.endsWith(".md"));
2493
+ const results = await Promise.all(
2494
+ mdFiles.map(async (fileName) => {
2495
+ const filePath = path19.join(dirPath, fileName);
2496
+ const content = await readFile9(filePath, "utf-8");
2497
+ return { filePath, content };
2498
+ })
2499
+ );
2500
+ return results;
2501
+ }
2502
+ async function collectAllPages(root) {
2503
+ const conceptPages = await readMarkdownFiles(path19.join(root, CONCEPTS_DIR));
2504
+ const queryPages = await readMarkdownFiles(path19.join(root, QUERIES_DIR));
2505
+ return [...conceptPages, ...queryPages];
2506
+ }
2507
+ function buildPageSlugSet(pages) {
2508
+ const slugs = /* @__PURE__ */ new Set();
2509
+ for (const page of pages) {
2510
+ const baseName = path19.basename(page.filePath, ".md");
2511
+ slugs.add(baseName.toLowerCase());
2512
+ }
2513
+ return slugs;
2514
+ }
2515
+ async function checkBrokenWikilinks(root) {
2516
+ const pages = await collectAllPages(root);
2517
+ const existingSlugs = buildPageSlugSet(pages);
2518
+ const results = [];
2519
+ for (const page of pages) {
2520
+ for (const { captured, line } of findMatchesInContent(page.content, WIKILINK_PATTERN)) {
2521
+ const linkSlug = slugify(captured);
2522
+ if (!existingSlugs.has(linkSlug)) {
2523
+ results.push({
2524
+ rule: "broken-wikilink",
2525
+ severity: "error",
2526
+ file: page.filePath,
2527
+ message: `Broken wikilink [[${captured}]] \u2014 no matching page found`,
2528
+ line
2529
+ });
2530
+ }
2531
+ }
2532
+ }
2533
+ return results;
2534
+ }
2535
+ async function checkOrphanedPages(root) {
2536
+ const pages = await collectAllPages(root);
2537
+ const results = [];
2538
+ for (const page of pages) {
2539
+ const { meta } = parseFrontmatter(page.content);
2540
+ if (meta.orphaned === true) {
2541
+ results.push({
2542
+ rule: "orphaned-page",
2543
+ severity: "warning",
2544
+ file: page.filePath,
2545
+ message: `Page is marked as orphaned`
2546
+ });
2547
+ }
2548
+ }
2549
+ return results;
2550
+ }
2551
+ async function checkMissingSummaries(root) {
2552
+ const pages = await collectAllPages(root);
2553
+ const results = [];
2554
+ for (const page of pages) {
2555
+ const { meta } = parseFrontmatter(page.content);
2556
+ const summary = meta.summary;
2557
+ const isMissing = !summary || typeof summary === "string" && summary.trim() === "";
2558
+ if (isMissing) {
2559
+ results.push({
2560
+ rule: "missing-summary",
2561
+ severity: "warning",
2562
+ file: page.filePath,
2563
+ message: `Page has no summary in frontmatter`
2564
+ });
2565
+ }
2566
+ }
2567
+ return results;
2568
+ }
2569
+ async function checkDuplicateConcepts(root) {
2570
+ const pages = await collectAllPages(root);
2571
+ const titleMap = /* @__PURE__ */ new Map();
2572
+ for (const page of pages) {
2573
+ const { meta } = parseFrontmatter(page.content);
2574
+ const title = typeof meta.title === "string" ? meta.title : "";
2575
+ if (!title) continue;
2576
+ const normalizedTitle = title.toLowerCase().trim();
2577
+ const existing = titleMap.get(normalizedTitle) ?? [];
2578
+ existing.push(page.filePath);
2579
+ titleMap.set(normalizedTitle, existing);
2580
+ }
2581
+ const results = [];
2582
+ for (const [title, files] of titleMap) {
2583
+ if (files.length <= 1) continue;
2584
+ for (const file of files) {
2585
+ results.push({
2586
+ rule: "duplicate-concept",
2587
+ severity: "error",
2588
+ file,
2589
+ message: `Duplicate title "${title}" \u2014 also in ${files.filter((f) => f !== file).join(", ")}`
2590
+ });
2591
+ }
2592
+ }
2593
+ return results;
2594
+ }
2595
+ async function checkEmptyPages(root) {
2596
+ const pages = await collectAllPages(root);
2597
+ const results = [];
2598
+ for (const page of pages) {
2599
+ const { meta, body } = parseFrontmatter(page.content);
2600
+ const hasTitle = typeof meta.title === "string" && meta.title.trim() !== "";
2601
+ const isBodyEmpty = body.trim().length < MIN_BODY_LENGTH;
2602
+ if (hasTitle && isBodyEmpty) {
2603
+ results.push({
2604
+ rule: "empty-page",
2605
+ severity: "warning",
2606
+ file: page.filePath,
2607
+ message: `Page body is empty or too short (< ${MIN_BODY_LENGTH} chars)`
2608
+ });
2609
+ }
2610
+ }
2611
+ return results;
2612
+ }
2613
+ async function checkLowConfidencePages(root) {
2614
+ const pages = await collectAllPages(root);
2615
+ const results = [];
2616
+ for (const page of pages) {
2617
+ const { meta } = parseFrontmatter(page.content);
2618
+ const { confidence } = parseProvenanceMetadata(meta);
2619
+ if (confidence === void 0 || confidence >= LOW_CONFIDENCE_THRESHOLD) continue;
2620
+ results.push({
2621
+ rule: "low-confidence",
2622
+ severity: "warning",
2623
+ file: page.filePath,
2624
+ message: `Page confidence ${confidence.toFixed(2)} is below ${LOW_CONFIDENCE_THRESHOLD}`
2625
+ });
2626
+ }
2627
+ return results;
2628
+ }
2629
+ async function checkContradictedPages(root) {
2630
+ const pages = await collectAllPages(root);
2631
+ const results = [];
2632
+ for (const page of pages) {
2633
+ const { meta } = parseFrontmatter(page.content);
2634
+ const { contradictedBy } = parseProvenanceMetadata(meta);
2635
+ if (!contradictedBy || contradictedBy.length === 0) continue;
2636
+ const slugs = contradictedBy.map((r) => r.slug).join(", ");
2637
+ results.push({
2638
+ rule: "contradicted-page",
2639
+ severity: "warning",
2640
+ file: page.filePath,
2641
+ message: `Page contradicts: ${slugs}`
2642
+ });
2643
+ }
2644
+ return results;
2645
+ }
2646
+ async function checkInferredWithoutCitations(root) {
2647
+ const pages = await collectAllPages(root);
2648
+ const results = [];
2649
+ for (const page of pages) {
2650
+ const { meta, body } = parseFrontmatter(page.content);
2651
+ const provenance = parseProvenanceMetadata(meta);
2652
+ const inferred = provenance.inferredParagraphs ?? countUncitedProseParagraphs(body);
2653
+ if (inferred <= MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS) continue;
2654
+ results.push({
2655
+ rule: "excess-inferred-paragraphs",
2656
+ severity: "warning",
2657
+ file: page.filePath,
2658
+ message: `Page has ${inferred} inferred paragraphs without citations (max ${MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS})`
2659
+ });
2660
+ }
2661
+ return results;
2662
+ }
2663
+ var PROSE_PARAGRAPH_LEAD = /^[A-Za-z]/;
2664
+ function countUncitedProseParagraphs(body) {
2665
+ const paragraphs = body.split(/\n\s*\n/);
2666
+ let count = 0;
2667
+ for (const block of paragraphs) {
2668
+ const trimmed = block.trim();
2669
+ if (trimmed.length === 0) continue;
2670
+ if (!PROSE_PARAGRAPH_LEAD.test(trimmed)) continue;
2671
+ if (CITATION_PATTERN.test(trimmed)) {
2672
+ CITATION_PATTERN.lastIndex = 0;
2673
+ continue;
2674
+ }
2675
+ CITATION_PATTERN.lastIndex = 0;
2676
+ count += 1;
2677
+ }
2678
+ return count;
2679
+ }
2680
+ function splitCitationFilenames(captured) {
2681
+ return captured.split(",").map((s) => s.trim()).filter((s) => s.length > 0);
2682
+ }
2683
+ async function checkBrokenCitations(root) {
2684
+ const pages = await collectAllPages(root);
2685
+ const sourcesDir = path19.join(root, SOURCES_DIR);
2686
+ const results = [];
2687
+ for (const page of pages) {
2688
+ for (const { captured, line } of findMatchesInContent(page.content, CITATION_PATTERN)) {
2689
+ for (const filename of splitCitationFilenames(captured)) {
2690
+ const citedPath = path19.join(sourcesDir, filename);
2691
+ if (!existsSync8(citedPath)) {
2692
+ results.push({
2693
+ rule: "broken-citation",
2694
+ severity: "error",
2695
+ file: page.filePath,
2696
+ message: `Broken citation ^[${filename}] \u2014 source file not found`,
2697
+ line
2698
+ });
2699
+ }
2700
+ }
2701
+ }
2702
+ }
2703
+ return results;
2704
+ }
2705
+
2706
+ // src/linter/index.ts
2707
+ var ALL_RULES = [
2708
+ checkBrokenWikilinks,
2709
+ checkOrphanedPages,
2710
+ checkMissingSummaries,
2711
+ checkDuplicateConcepts,
2712
+ checkEmptyPages,
2713
+ checkBrokenCitations,
2714
+ checkLowConfidencePages,
2715
+ checkContradictedPages,
2716
+ checkInferredWithoutCitations
2717
+ ];
2718
+ function countBySeverity(results, severity) {
2719
+ return results.filter((r) => r.severity === severity).length;
2720
+ }
2721
+ async function lint(root) {
2722
+ const ruleResults = await Promise.all(
2723
+ ALL_RULES.map((rule) => rule(root))
2724
+ );
2725
+ const results = ruleResults.flat();
2726
+ return {
2727
+ errors: countBySeverity(results, "error"),
2728
+ warnings: countBySeverity(results, "warning"),
2729
+ info: countBySeverity(results, "info"),
2730
+ results
2731
+ };
2732
+ }
2733
+
2734
+ // src/commands/lint.ts
2735
+ var SEVERITY_FORMATTERS = {
2736
+ error,
2737
+ warning: warn,
2738
+ info
2739
+ };
2740
+ var SEVERITY_ICONS = {
2741
+ error: "x",
2742
+ warning: "!",
2743
+ info: "i"
2744
+ };
2745
+ function printResult(result) {
2746
+ const formatter = SEVERITY_FORMATTERS[result.severity];
2747
+ const icon = SEVERITY_ICONS[result.severity];
2748
+ const location = result.line ? `${result.file}:${result.line}` : result.file;
2749
+ status(icon, `${formatter(result.severity)} ${dim(location)} ${result.message}`);
2750
+ }
2751
+ async function lintCommand() {
2752
+ header("Linting wiki");
2753
+ const summary = await lint(process.cwd());
2754
+ for (const result of summary.results) {
2755
+ printResult(result);
2756
+ }
2757
+ console.log();
2758
+ const summaryLine = [
2759
+ error(`${summary.errors} error(s)`),
2760
+ warn(`${summary.warnings} warning(s)`),
2761
+ info(`${summary.info} info`)
2762
+ ].join(", ");
2763
+ status("*", summaryLine);
2764
+ if (summary.errors > 0) {
2765
+ process.exit(1);
2766
+ }
2767
+ }
2768
+
2769
+ // src/commands/review-list.ts
2770
+ async function reviewListCommand() {
2771
+ header("Pending review candidates");
2772
+ const candidates = await listCandidates(process.cwd());
2773
+ if (candidates.length === 0) {
2774
+ status("\u2713", success("No pending candidates."));
2775
+ return;
2776
+ }
2777
+ for (const candidate of candidates) {
2778
+ const sources = candidate.sources.join(", ");
2779
+ const meta = dim(`${candidate.generatedAt} | sources: ${sources}`);
2780
+ status("?", `${info(candidate.id)} \u2192 ${candidate.slug} ${meta}`);
2781
+ }
2782
+ status(
2783
+ "\u2192",
2784
+ dim(`Use \`llmwiki review show <id>\` to inspect a candidate.`)
2785
+ );
2786
+ }
2787
+
2788
+ // src/commands/review-show.ts
2789
+ async function reviewShowCommand(id) {
2790
+ const candidate = await loadCandidateOrFail(process.cwd(), id);
2791
+ if (!candidate) return;
2792
+ header(`Candidate ${candidate.id}`);
2793
+ status("i", dim(`title: ${candidate.title}`));
2794
+ status("i", dim(`slug: ${candidate.slug}`));
2795
+ status("i", dim(`summary: ${candidate.summary}`));
2796
+ status("i", dim(`sources: ${candidate.sources.join(", ")}`));
2797
+ status("i", dim(`generated: ${candidate.generatedAt}`));
2798
+ console.log();
2799
+ console.log(candidate.body);
2800
+ }
2801
+
2802
+ // src/commands/review-approve.ts
2803
+ import path20 from "path";
2804
+
2805
+ // src/commands/review-helpers.ts
2806
+ async function runReviewUnderLock(id, underLock) {
2807
+ const root = process.cwd();
2808
+ const preCheck = await loadCandidateOrFail(root, id);
2809
+ if (!preCheck) return;
2810
+ const locked = await acquireLock(root);
2811
+ if (!locked) {
2812
+ status("!", error("Could not acquire lock. Try again later."));
2813
+ process.exitCode = 1;
2814
+ return;
2815
+ }
2816
+ try {
2817
+ await underLock(root, id);
2818
+ } finally {
2819
+ await releaseLock(root);
2820
+ }
2821
+ }
2822
+
2823
+ // src/commands/review-approve.ts
2824
+ async function reviewApproveCommand(id) {
2825
+ await runReviewUnderLock(id, approveUnderLock);
2826
+ }
2827
+ async function approveUnderLock(root, id) {
2828
+ const candidate = await loadCandidateUnderLockOrFail(root, id);
2829
+ if (!candidate) return;
2830
+ if (!validateWikiPage(candidate.body)) {
2831
+ status("!", error(`Candidate ${id} failed page validation; not approved.`));
2832
+ process.exitCode = 1;
2833
+ return;
2834
+ }
2835
+ const pagePath = path20.join(root, CONCEPTS_DIR, `${candidate.slug}.md`);
2836
+ await atomicWrite(pagePath, candidate.body);
2837
+ status("+", success(`Approved \u2192 ${source(pagePath)}`));
2838
+ await persistCandidateSourceStates(root, candidate);
2839
+ await refreshWikiAfterApproval(root, candidate.slug);
2840
+ await deleteCandidate(root, id);
2841
+ status("\u2713", dim(`Candidate ${id} cleared.`));
2842
+ }
2843
+ async function persistCandidateSourceStates(root, candidate) {
2844
+ const states = candidate.sourceStates;
2845
+ if (!states) return;
2846
+ const otherSources = await collectOtherCandidateSources(root, candidate.id);
2847
+ for (const [sourceFile, entry] of Object.entries(states)) {
2848
+ if (otherSources.has(sourceFile)) continue;
2849
+ await updateSourceState(root, sourceFile, entry);
2850
+ }
2851
+ }
2852
+ async function collectOtherCandidateSources(root, approvingId) {
2853
+ const pending = await listCandidates(root);
2854
+ const sources = /* @__PURE__ */ new Set();
2855
+ for (const candidate of pending) {
2856
+ if (candidate.id === approvingId) continue;
2857
+ for (const source2 of candidate.sources) sources.add(source2);
2858
+ }
2859
+ return sources;
2860
+ }
2861
+ async function refreshWikiAfterApproval(root, slug) {
2862
+ await resolveLinks(root, [slug], [slug]);
2863
+ await generateIndex(root);
2864
+ await generateMOC(root);
2865
+ await safelyUpdateEmbeddings2(root, [slug]);
2866
+ }
2867
+ async function safelyUpdateEmbeddings2(root, slugs) {
2868
+ try {
2869
+ await updateEmbeddings(root, slugs);
2870
+ } catch (err) {
2871
+ const message = err instanceof Error ? err.message : String(err);
2872
+ status("!", warn(`Skipped embeddings update: ${message}`));
2873
+ }
2874
+ }
2875
+
2876
+ // src/commands/review-reject.ts
2877
+ async function reviewRejectCommand(id) {
2878
+ await runReviewUnderLock(id, rejectUnderLock);
2879
+ }
2880
+ async function rejectUnderLock(root, id) {
2881
+ const candidate = await loadCandidateUnderLockOrFail(root, id);
2882
+ if (!candidate) return;
2883
+ await archiveCandidate(root, id);
2884
+ status(
2885
+ "-",
2886
+ warn(`Rejected candidate ${id} (${candidate.slug}) \u2014 archived, wiki unchanged.`)
2887
+ );
2888
+ }
2889
+
2890
+ // src/mcp/server.ts
2891
+ import { McpServer as McpServer2 } from "@modelcontextprotocol/sdk/server/mcp.js";
2892
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
2893
+
2894
+ // src/mcp/tools.ts
2895
+ import path21 from "path";
2896
+ import { z } from "zod";
2897
+
2898
+ // src/mcp/provider-check.ts
2899
+ var PROVIDER_KEY_VARS = {
2900
+ anthropic: "ANTHROPIC_API_KEY",
2901
+ openai: "OPENAI_API_KEY",
2902
+ ollama: null,
2903
+ minimax: "MINIMAX_API_KEY"
2904
+ };
2905
+ function ensureProviderAvailable() {
2906
+ const provider = process.env.LLMWIKI_PROVIDER ?? DEFAULT_PROVIDER;
2907
+ if (provider === "anthropic") {
2908
+ const auth = resolveAnthropicAuthFromEnv();
2909
+ if (!auth.apiKey && !auth.authToken) {
2910
+ throw new Error(
2911
+ 'Anthropic credentials are required for the "anthropic" provider. Set ANTHROPIC_API_KEY or ANTHROPIC_AUTH_TOKEN.'
2912
+ );
2913
+ }
2914
+ return;
2915
+ }
2916
+ const keyVar = PROVIDER_KEY_VARS[provider];
2917
+ if (keyVar === void 0) {
2918
+ throw new Error(
2919
+ `Unknown provider "${provider}". Supported: ${Object.keys(PROVIDER_KEY_VARS).join(", ")}`
2920
+ );
2921
+ }
2922
+ if (keyVar && !process.env[keyVar]) {
2923
+ throw new Error(
2924
+ `${keyVar} environment variable is required for the "${provider}" provider.`
2925
+ );
2926
+ }
2927
+ }
2928
+
2929
+ // src/mcp/tools.ts
2930
+ var PAGE_DIRS2 = [CONCEPTS_DIR, QUERIES_DIR];
2931
+ function jsonResult(payload) {
2932
+ return {
2933
+ content: [{ type: "text", text: JSON.stringify(payload, null, 2) }],
2934
+ structuredContent: { result: payload }
2935
+ };
2936
+ }
2937
+ function registerWikiTools(server, root) {
2938
+ registerIngestTool(server, root);
2939
+ registerCompileTool(server, root);
2940
+ registerQueryTool(server, root);
2941
+ registerSearchTool(server, root);
2942
+ registerReadTool(server, root);
2943
+ registerLintTool(server, root);
2944
+ registerStatusTool(server, root);
2945
+ }
2946
+ function registerIngestTool(server, root) {
2947
+ server.registerTool(
2948
+ "ingest_source",
2949
+ {
2950
+ title: "Ingest Source",
2951
+ description: "Fetch a URL or copy a local file into sources/. Returns the saved filename, character count, and whether content was truncated to fit the size limit.",
2952
+ inputSchema: {
2953
+ source: z.string().describe("URL (http/https) or absolute path to a .md/.txt file")
2954
+ }
2955
+ },
2956
+ async ({ source: source2 }) => {
2957
+ const previousCwd = process.cwd();
2958
+ try {
2959
+ process.chdir(root);
2960
+ const result = await ingestSource(source2);
2961
+ return jsonResult(result);
2962
+ } finally {
2963
+ process.chdir(previousCwd);
2964
+ }
2965
+ }
2966
+ );
2967
+ }
2968
+ function registerCompileTool(server, root) {
2969
+ server.registerTool(
2970
+ "compile_wiki",
2971
+ {
2972
+ title: "Compile Wiki",
2973
+ description: "Run the incremental compile pipeline: extract concepts from new/changed sources, generate wiki pages, resolve interlinks, and rebuild the index. Requires an LLM provider with credentials.",
2974
+ inputSchema: {}
2975
+ },
2976
+ async () => {
2977
+ ensureProviderAvailable();
2978
+ const result = await compileAndReport(root);
2979
+ return jsonResult(result);
2980
+ }
2981
+ );
2982
+ }
2983
+ function registerQueryTool(server, root) {
2984
+ server.registerTool(
2985
+ "query_wiki",
2986
+ {
2987
+ title: "Query Wiki",
2988
+ description: "Ask a natural-language question. Selects relevant pages with the LLM, loads them, and returns a grounded answer with citations. Set save=true to persist the answer as a wiki page. Requires an LLM provider.",
2989
+ inputSchema: {
2990
+ question: z.string().describe("The natural-language question to answer."),
2991
+ save: z.boolean().optional().describe("Persist the answer as a wiki/queries/ page when true.")
2992
+ }
2993
+ },
2994
+ async ({ question, save }) => {
2995
+ ensureProviderAvailable();
2996
+ const result = await generateAnswer(root, question, { save });
2997
+ return jsonResult(result);
2998
+ }
2999
+ );
3000
+ }
3001
+ function registerSearchTool(server, root) {
3002
+ server.registerTool(
3003
+ "search_pages",
3004
+ {
3005
+ title: "Search Pages",
3006
+ description: "Select pages relevant to a question and return their full content. Uses semantic embeddings when available, falling back to LLM-based selection over the wiki index. Requires an LLM provider.",
3007
+ inputSchema: {
3008
+ question: z.string().describe("The query used to rank pages.")
3009
+ }
3010
+ },
3011
+ async ({ question }) => {
3012
+ ensureProviderAvailable();
3013
+ const slugs = await pickSearchSlugs(root, question);
3014
+ const records = await loadPageRecords(root, slugs);
3015
+ return jsonResult({ pages: records });
3016
+ }
3017
+ );
3018
+ }
3019
+ async function pickSearchSlugs(root, question) {
3020
+ try {
3021
+ const candidates = await findRelevantPages(root, question);
3022
+ if (candidates.length > 0) return candidates.map((c) => c.slug);
3023
+ } catch {
3024
+ }
3025
+ const indexContent = await safeReadFile(path21.join(root, INDEX_FILE));
3026
+ const { pages } = await selectPages(question, indexContent);
3027
+ return pages;
3028
+ }
3029
+ function registerReadTool(server, root) {
3030
+ server.registerTool(
3031
+ "read_page",
3032
+ {
3033
+ title: "Read Page",
3034
+ description: "Read a single wiki page by slug. Searches concepts/ first, then queries/. Returns the parsed frontmatter and body. No LLM call required.",
3035
+ inputSchema: {
3036
+ slug: z.string().describe("Page slug, without .md extension.")
3037
+ }
3038
+ },
3039
+ async ({ slug }) => {
3040
+ const page = await readPage(root, slug);
3041
+ if (!page) {
3042
+ throw new Error(`Page not found: ${slug}`);
3043
+ }
3044
+ return jsonResult(page);
3045
+ }
3046
+ );
3047
+ }
3048
+ function registerLintTool(server, root) {
3049
+ server.registerTool(
3050
+ "lint_wiki",
3051
+ {
3052
+ title: "Lint Wiki",
3053
+ description: "Run rule-based quality checks (broken wikilinks, orphans, duplicates, empty pages, broken citations). Returns structured diagnostics. No LLM call.",
3054
+ inputSchema: {}
3055
+ },
3056
+ async () => {
3057
+ const summary = await lint(root);
3058
+ return jsonResult(summary);
3059
+ }
3060
+ );
3061
+ }
3062
+ function registerStatusTool(server, root) {
3063
+ server.registerTool(
3064
+ "wiki_status",
3065
+ {
3066
+ title: "Wiki Status",
3067
+ description: "Summarize the wiki: page count, source count, last compile time, orphaned pages, and pending source changes. Read-only \u2014 never modifies the workspace.",
3068
+ inputSchema: {}
3069
+ },
3070
+ async () => jsonResult(await collectStatus(root))
3071
+ );
3072
+ }
3073
+ async function collectStatus(root) {
3074
+ const concepts = await collectPageSummaries(path21.join(root, CONCEPTS_DIR));
3075
+ const queries = await collectPageSummaries(path21.join(root, QUERIES_DIR));
3076
+ const state = await readState(root);
3077
+ const changes = await detectChanges(root, state);
3078
+ const orphans = await findOrphanedSlugs(root);
3079
+ const pendingCandidates = await countCandidates(root);
3080
+ const compileTimes = Object.values(state.sources).map((s) => s.compiledAt);
3081
+ const lastCompile = compileTimes.length > 0 ? compileTimes.sort().slice(-1)[0] : null;
3082
+ return {
3083
+ pages: { concepts: concepts.length, queries: queries.length, total: concepts.length + queries.length },
3084
+ sources: Object.keys(state.sources).length,
3085
+ lastCompiledAt: lastCompile,
3086
+ orphanedPages: orphans,
3087
+ pendingCandidates,
3088
+ pendingChanges: changes.filter((c) => c.status !== "unchanged").map((c) => ({ file: c.file, status: c.status }))
3089
+ };
3090
+ }
3091
+ async function findOrphanedSlugs(root) {
3092
+ const scanned = await scanWikiPages(path21.join(root, CONCEPTS_DIR));
3093
+ return scanned.filter(({ meta }) => meta.orphaned).map(({ slug }) => slug);
3094
+ }
3095
+ async function loadPageRecords(root, slugs) {
3096
+ const records = [];
3097
+ for (const slug of slugs) {
3098
+ const page = await readPage(root, slug);
3099
+ if (page) records.push(page);
3100
+ }
3101
+ return records;
3102
+ }
3103
+ async function readPage(root, slug) {
3104
+ for (const dir of PAGE_DIRS2) {
3105
+ const content = await safeReadFile(path21.join(root, dir, `${slug}.md`));
3106
+ if (!content) continue;
3107
+ const { meta, body } = parseFrontmatter(content);
3108
+ if (meta.orphaned) continue;
3109
+ return {
3110
+ slug,
3111
+ title: typeof meta.title === "string" ? meta.title : slug,
3112
+ summary: typeof meta.summary === "string" ? meta.summary : "",
3113
+ body: body.trim()
3114
+ };
3115
+ }
3116
+ return null;
3117
+ }
3118
+
3119
+ // src/mcp/resources.ts
3120
+ import path22 from "path";
3121
+ import { readdir as readdir9 } from "fs/promises";
3122
+ import { ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
3123
+ function jsonContent(uri, payload) {
3124
+ return {
3125
+ uri: uri.href,
3126
+ mimeType: "application/json",
3127
+ text: JSON.stringify(payload, null, 2)
3128
+ };
3129
+ }
3130
+ function markdownContent(uri, text) {
3131
+ return {
3132
+ uri: uri.href,
3133
+ mimeType: "text/markdown",
3134
+ text
3135
+ };
3136
+ }
3137
+ function registerWikiResources(server, root) {
3138
+ registerIndexResource(server, root);
3139
+ registerSourcesResource(server, root);
3140
+ registerStateResource(server, root);
3141
+ registerConceptResource(server, root);
3142
+ registerQueryResource(server, root);
3143
+ }
3144
+ function registerIndexResource(server, root) {
3145
+ server.registerResource(
3146
+ "wiki-index",
3147
+ "llmwiki://index",
3148
+ {
3149
+ title: "Wiki Index",
3150
+ description: "Full content of wiki/index.md (auto-generated table of contents).",
3151
+ mimeType: "text/markdown"
3152
+ },
3153
+ async (uri) => {
3154
+ const content = await safeReadFile(path22.join(root, INDEX_FILE));
3155
+ return { contents: [markdownContent(uri, content)] };
3156
+ }
3157
+ );
3158
+ }
3159
+ function registerSourcesResource(server, root) {
3160
+ server.registerResource(
3161
+ "wiki-sources",
3162
+ "llmwiki://sources",
3163
+ {
3164
+ title: "Wiki Sources",
3165
+ description: "List of ingested source files with frontmatter metadata.",
3166
+ mimeType: "application/json"
3167
+ },
3168
+ async (uri) => ({
3169
+ contents: [jsonContent(uri, await listSources(root))]
3170
+ })
3171
+ );
3172
+ }
3173
+ function registerStateResource(server, root) {
3174
+ server.registerResource(
3175
+ "wiki-state",
3176
+ "llmwiki://state",
3177
+ {
3178
+ title: "Compilation State",
3179
+ description: "Per-source hashes, concepts, and last compile times from .llmwiki/state.json.",
3180
+ mimeType: "application/json"
3181
+ },
3182
+ async (uri) => {
3183
+ const state = await readState(root);
3184
+ return { contents: [jsonContent(uri, state)] };
3185
+ }
3186
+ );
3187
+ }
3188
+ function registerConceptResource(server, root) {
3189
+ server.registerResource(
3190
+ "wiki-concept",
3191
+ new ResourceTemplate("llmwiki://concept/{slug}", {
3192
+ list: async () => listPagesUnder(root, CONCEPTS_DIR, "concept")
3193
+ }),
3194
+ {
3195
+ title: "Wiki Concept",
3196
+ description: "A single concept page from wiki/concepts/ \u2014 frontmatter plus body.",
3197
+ mimeType: "application/json"
3198
+ },
3199
+ async (uri, { slug }) => ({
3200
+ contents: [jsonContent(uri, await loadPageWithMeta(root, CONCEPTS_DIR, String(slug)))]
3201
+ })
3202
+ );
3203
+ }
3204
+ function registerQueryResource(server, root) {
3205
+ server.registerResource(
3206
+ "wiki-query",
3207
+ new ResourceTemplate("llmwiki://query/{slug}", {
3208
+ list: async () => listPagesUnder(root, QUERIES_DIR, "query")
3209
+ }),
3210
+ {
3211
+ title: "Wiki Query",
3212
+ description: "A single saved query page from wiki/queries/ \u2014 frontmatter plus body.",
3213
+ mimeType: "application/json"
3214
+ },
3215
+ async (uri, { slug }) => ({
3216
+ contents: [jsonContent(uri, await loadPageWithMeta(root, QUERIES_DIR, String(slug)))]
3217
+ })
3218
+ );
3219
+ }
3220
+ async function listSources(root) {
3221
+ const sourcesPath = path22.join(root, SOURCES_DIR);
3222
+ let files;
3223
+ try {
3224
+ files = await readdir9(sourcesPath);
3225
+ } catch {
3226
+ return [];
3227
+ }
3228
+ const records = [];
3229
+ for (const file of files.filter((f) => f.endsWith(".md"))) {
3230
+ const content = await safeReadFile(path22.join(sourcesPath, file));
3231
+ const { meta } = parseFrontmatter(content);
3232
+ records.push({ filename: file, ...meta });
3233
+ }
3234
+ return records;
3235
+ }
3236
+ async function loadPageWithMeta(root, dir, slug) {
3237
+ const filePath = path22.join(root, dir, `${slug}.md`);
3238
+ const content = await safeReadFile(filePath);
3239
+ if (!content) {
3240
+ throw new Error(`Page not found: ${dir}/${slug}.md`);
3241
+ }
3242
+ const { meta, body } = parseFrontmatter(content);
3243
+ return { slug, meta, body: body.trim() };
3244
+ }
3245
+ async function listPagesUnder(root, dir, scheme) {
3246
+ const pagesPath = path22.join(root, dir);
3247
+ let files;
3248
+ try {
3249
+ files = await readdir9(pagesPath);
3250
+ } catch {
3251
+ return { resources: [] };
3252
+ }
3253
+ const resources = files.filter((f) => f.endsWith(".md")).map((f) => {
3254
+ const slug = f.replace(/\.md$/, "");
3255
+ return { uri: `llmwiki://${scheme}/${slug}`, name: slug };
3256
+ });
3257
+ return { resources };
3258
+ }
3259
+
3260
+ // src/mcp/server.ts
3261
+ async function startMCPServer(options) {
3262
+ const { root, version: version2 } = options;
3263
+ const server = new McpServer2({ name: "llmwiki", version: version2 }, {
3264
+ instructions: "llmwiki is a knowledge compiler. Use ingest_source to add raw sources, compile_wiki to run the LLM pipeline, query_wiki for grounded answers, and search_pages to retrieve relevant pages. read_page, lint_wiki, and wiki_status work without an API key."
3265
+ });
3266
+ registerWikiTools(server, root);
3267
+ registerWikiResources(server, root);
3268
+ const transport = new StdioServerTransport();
3269
+ await server.connect(transport);
3270
+ }
3271
+
1353
3272
  // src/cli.ts
1354
3273
  var require2 = createRequire(import.meta.url);
1355
3274
  var { version } = require2("../package.json");
@@ -1363,18 +3282,54 @@ program.command("ingest <source>").description("Ingest a URL or local file into
1363
3282
  process.exit(1);
1364
3283
  }
1365
3284
  });
1366
- program.command("compile").description("Compile sources/ into an interlinked wiki").action(async () => {
1367
- requireApiKey();
3285
+ program.command("compile").description("Compile sources/ into an interlinked wiki").option(
3286
+ "--review",
3287
+ "Write generated pages as review candidates under .llmwiki/candidates/ instead of mutating wiki/. Orphan-marking for deleted sources is deferred until the next non-review compile."
3288
+ ).action(async (options) => {
3289
+ try {
3290
+ requireProvider();
3291
+ await compileCommand({ review: options.review });
3292
+ } catch (err) {
3293
+ console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
3294
+ process.exit(1);
3295
+ }
3296
+ });
3297
+ var reviewCommand = program.command("review").description("Inspect and act on pending compile review candidates");
3298
+ reviewCommand.command("list").description("List pending review candidates").action(async () => {
1368
3299
  try {
1369
- await compileCommand();
3300
+ await reviewListCommand();
3301
+ } catch (err) {
3302
+ console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
3303
+ process.exit(1);
3304
+ }
3305
+ });
3306
+ reviewCommand.command("show <id>").description("Print a single candidate's metadata and body").action(async (id) => {
3307
+ try {
3308
+ await reviewShowCommand(id);
3309
+ } catch (err) {
3310
+ console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
3311
+ process.exit(1);
3312
+ }
3313
+ });
3314
+ reviewCommand.command("approve <id>").description("Approve a candidate and promote it into wiki/concepts/").action(async (id) => {
3315
+ try {
3316
+ await reviewApproveCommand(id);
3317
+ } catch (err) {
3318
+ console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
3319
+ process.exit(1);
3320
+ }
3321
+ });
3322
+ reviewCommand.command("reject <id>").description("Reject a candidate and archive it without touching wiki/").action(async (id) => {
3323
+ try {
3324
+ await reviewRejectCommand(id);
1370
3325
  } catch (err) {
1371
3326
  console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
1372
3327
  process.exit(1);
1373
3328
  }
1374
3329
  });
1375
3330
  program.command("query <question>").description("Ask a question against the wiki").option("--save", "Save the answer as a wiki page").action(async (question, options) => {
1376
- requireApiKey();
1377
3331
  try {
3332
+ requireProvider();
1378
3333
  await queryCommand(process.cwd(), question, options);
1379
3334
  } catch (err) {
1380
3335
  console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
@@ -1382,21 +3337,64 @@ program.command("query <question>").description("Ask a question against the wiki
1382
3337
  }
1383
3338
  });
1384
3339
  program.command("watch").description("Watch sources/ and auto-recompile on changes").action(async () => {
1385
- requireApiKey();
1386
3340
  try {
3341
+ requireProvider();
1387
3342
  await watchCommand();
1388
3343
  } catch (err) {
1389
3344
  console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
1390
3345
  process.exit(1);
1391
3346
  }
1392
3347
  });
1393
- program.parse();
1394
- function requireApiKey() {
1395
- if (!process.env.ANTHROPIC_API_KEY) {
3348
+ program.command("lint").description("Run rule-based quality checks against the wiki").action(async () => {
3349
+ try {
3350
+ await lintCommand();
3351
+ } catch (err) {
3352
+ console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
3353
+ process.exit(1);
3354
+ }
3355
+ });
3356
+ program.command("serve").description("Start an MCP server exposing wiki tools and resources over stdio").option("--root <dir>", "Project root directory", process.cwd()).action(async (options) => {
3357
+ try {
3358
+ await startMCPServer({ root: options.root, version });
3359
+ } catch (err) {
3360
+ console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
3361
+ process.exit(1);
3362
+ }
3363
+ });
3364
+ var PROVIDER_KEY_VARS2 = {
3365
+ anthropic: "ANTHROPIC_API_KEY",
3366
+ openai: "OPENAI_API_KEY",
3367
+ ollama: null,
3368
+ minimax: "MINIMAX_API_KEY"
3369
+ };
3370
+ function requireProvider() {
3371
+ const provider = process.env.LLMWIKI_PROVIDER ?? DEFAULT_PROVIDER;
3372
+ if (provider === "anthropic") {
3373
+ const auth = resolveAnthropicAuthFromEnv();
3374
+ if (!auth.apiKey && !auth.authToken) {
3375
+ console.error(
3376
+ `\x1B[31mError:\x1B[0m Anthropic credentials are required for the "anthropic" provider.
3377
+ Set one of: export ANTHROPIC_API_KEY=<your-key> OR export ANTHROPIC_AUTH_TOKEN=<your-token>`
3378
+ );
3379
+ process.exit(1);
3380
+ }
3381
+ return;
3382
+ }
3383
+ const keyVar = PROVIDER_KEY_VARS2[provider];
3384
+ if (keyVar === void 0) {
1396
3385
  console.error(
1397
- "\x1B[31mError:\x1B[0m ANTHROPIC_API_KEY environment variable is required.\n Set it with: export ANTHROPIC_API_KEY=sk-ant-...\n Get a key at: https://console.anthropic.com/settings/keys"
3386
+ `\x1B[31mError:\x1B[0m Unknown provider "${provider}".
3387
+ Supported: ${Object.keys(PROVIDER_KEY_VARS2).join(", ")}`
3388
+ );
3389
+ process.exit(1);
3390
+ }
3391
+ if (keyVar && !process.env[keyVar]) {
3392
+ console.error(
3393
+ `\x1B[31mError:\x1B[0m ${keyVar} environment variable is required for the "${provider}" provider.
3394
+ Set it with: export ${keyVar}=<your-key>`
1398
3395
  );
1399
3396
  process.exit(1);
1400
3397
  }
1401
3398
  }
3399
+ program.parse();
1402
3400
  //# sourceMappingURL=cli.js.map