llm-wiki-compiler 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -2,12 +2,12 @@
2
2
 
3
3
  // src/cli.ts
4
4
  import "dotenv/config";
5
- import { createRequire } from "module";
5
+ import { createRequire as createRequire2 } from "module";
6
6
  import { Command } from "commander";
7
7
 
8
8
  // src/commands/ingest.ts
9
- import path7 from "path";
10
- import { mkdir as mkdir2, readFile as readFile6, writeFile as writeFile2 } from "fs/promises";
9
+ import path8 from "path";
10
+ import { readFile as readFile7 } from "fs/promises";
11
11
 
12
12
  // src/utils/markdown.ts
13
13
  import { writeFile, rename, readFile, mkdir } from "fs/promises";
@@ -22,7 +22,7 @@ var VALID_PROVENANCE_STATES = /* @__PURE__ */ new Set([
22
22
  "ambiguous"
23
23
  ]);
24
24
  function slugify(title) {
25
- return title.toLowerCase().replace(/['']/g, "").replace(/[^\w\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
25
+ return title.toLowerCase().replace(/['']/g, "").replace(/[^\p{L}\p{N}\s-]/gu, "").replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
26
26
  }
27
27
  function buildFrontmatter(fields) {
28
28
  const dumped = yaml.dump(fields, { lineWidth: -1, quotingType: '"' }).trimEnd();
@@ -103,16 +103,11 @@ function parseContradictedBy(raw) {
103
103
  const refs = raw.map(coerceContradictionEntry).filter((ref) => ref !== null);
104
104
  return refs.length > 0 ? refs : void 0;
105
105
  }
106
- function parseInferredParagraphs(raw) {
107
- if (typeof raw !== "number" || !Number.isInteger(raw) || raw < 0) return void 0;
108
- return raw;
109
- }
110
106
  function parseProvenanceMetadata(meta) {
111
107
  return {
112
108
  confidence: parseConfidence(meta.confidence),
113
109
  provenanceState: parseProvenanceState(meta.provenanceState),
114
- contradictedBy: parseContradictedBy(meta.contradictedBy),
115
- inferredParagraphs: parseInferredParagraphs(meta.inferredParagraphs)
110
+ contradictedBy: parseContradictedBy(meta.contradictedBy)
116
111
  };
117
112
  }
118
113
  function validateWikiPage(content) {
@@ -123,9 +118,16 @@ function validateWikiPage(content) {
123
118
  return true;
124
119
  }
125
120
 
121
+ // src/utils/source-writer.ts
122
+ import { mkdir as mkdir2, readFile as readFile2, writeFile as writeFile2 } from "fs/promises";
123
+ import path2 from "path";
124
+ import { createHash } from "crypto";
125
+
126
126
  // src/utils/constants.ts
127
127
  var MAX_SOURCE_CHARS = 1e5;
128
128
  var MIN_SOURCE_CHARS = 50;
129
+ var DEFAULT_PROMPT_BUDGET_CHARS = 2e5;
130
+ var PROMPT_BUDGET_ENV_VAR = "LLMWIKI_PROMPT_BUDGET_CHARS";
129
131
  var QUERY_PAGE_LIMIT = 5;
130
132
  var COMPILE_CONCURRENCY = 5;
131
133
  var RETRY_COUNT = 3;
@@ -169,6 +171,42 @@ var EMBEDDING_MODELS = {
169
171
  ollama: "nomic-embed-text"
170
172
  };
171
173
 
174
+ // src/utils/source-writer.ts
175
+ var COLLISION_HASH_LEN = 8;
176
+ function shortHashOfSource(source2) {
177
+ return createHash("sha256").update(source2).digest("hex").slice(0, COLLISION_HASH_LEN);
178
+ }
179
+ async function resolveCollisionFreeFilename(slug, source2) {
180
+ const candidate = `${slug}.md`;
181
+ const candidatePath2 = path2.join(SOURCES_DIR, candidate);
182
+ let existing;
183
+ try {
184
+ existing = await readFile2(candidatePath2, "utf-8");
185
+ } catch (err) {
186
+ const e = err;
187
+ if (e.code === "ENOENT") return candidate;
188
+ throw err;
189
+ }
190
+ const { meta } = parseFrontmatter(existing);
191
+ if (typeof meta.source === "string" && meta.source === source2) {
192
+ return candidate;
193
+ }
194
+ return `${slug}-${shortHashOfSource(source2)}.md`;
195
+ }
196
+ async function saveSource(title, document, source2) {
197
+ const slug = slugify(title);
198
+ if (!slug) {
199
+ throw new Error(
200
+ `Could not derive a filename from title "${title}". The title contains no letter or number characters. Rename the source file to one with at least one letter or digit.`
201
+ );
202
+ }
203
+ await mkdir2(SOURCES_DIR, { recursive: true });
204
+ const filename = await resolveCollisionFreeFilename(slug, source2);
205
+ const destPath = path2.join(SOURCES_DIR, filename);
206
+ await writeFile2(destPath, document, "utf-8");
207
+ return destPath;
208
+ }
209
+
172
210
  // src/utils/output.ts
173
211
  var RESET = "\x1B[0m";
174
212
  var BOLD = "\x1B[1m";
@@ -244,13 +282,13 @@ async function ingestWeb(url) {
244
282
  }
245
283
 
246
284
  // src/ingest/file.ts
247
- import { readFile as readFile2 } from "fs/promises";
248
- import path3 from "path";
285
+ import { readFile as readFile3 } from "fs/promises";
286
+ import path4 from "path";
249
287
 
250
288
  // src/ingest/shared.ts
251
- import path2 from "path";
289
+ import path3 from "path";
252
290
  function titleFromFilename(filePath) {
253
- const basename = path2.basename(filePath, path2.extname(filePath));
291
+ const basename = path3.basename(filePath, path3.extname(filePath));
254
292
  return basename.replace(/[-_]+/g, " ").trim();
255
293
  }
256
294
 
@@ -262,20 +300,20 @@ ${text}
262
300
  \`\`\``;
263
301
  }
264
302
  async function ingestFile(filePath) {
265
- const ext = path3.extname(filePath).toLowerCase();
303
+ const ext = path4.extname(filePath).toLowerCase();
266
304
  if (!SUPPORTED_EXTENSIONS.has(ext)) {
267
305
  throw new Error(
268
306
  `Unsupported file type "${ext}". Only .md and .txt files are supported.`
269
307
  );
270
308
  }
271
- const raw = await readFile2(filePath, "utf-8");
309
+ const raw = await readFile3(filePath, "utf-8");
272
310
  const title = titleFromFilename(filePath);
273
311
  const content = ext === ".md" ? raw : wrapPlainText(raw);
274
312
  return { title, content };
275
313
  }
276
314
 
277
315
  // src/ingest/pdf.ts
278
- import { readFile as readFile3 } from "fs/promises";
316
+ import { readFile as readFile4 } from "fs/promises";
279
317
  function resolveTitle(filePath, info2) {
280
318
  if (info2 && typeof info2 === "object") {
281
319
  const titleField = info2["Title"];
@@ -287,7 +325,7 @@ function resolveTitle(filePath, info2) {
287
325
  }
288
326
  async function ingestPdf(filePath) {
289
327
  const { PDFParse } = await import("pdf-parse");
290
- const buffer = await readFile3(filePath);
328
+ const buffer = await readFile4(filePath);
291
329
  const parser = new PDFParse({ data: new Uint8Array(buffer) });
292
330
  try {
293
331
  const textResult = await parser.getText();
@@ -301,8 +339,8 @@ async function ingestPdf(filePath) {
301
339
  }
302
340
 
303
341
  // src/ingest/image.ts
304
- import { readFile as readFile4 } from "fs/promises";
305
- import path5 from "path";
342
+ import { readFile as readFile5 } from "fs/promises";
343
+ import path6 from "path";
306
344
  import Anthropic2 from "@anthropic-ai/sdk";
307
345
 
308
346
  // src/providers/anthropic.ts
@@ -419,7 +457,7 @@ var AnthropicProvider = class {
419
457
  // src/utils/claude-settings.ts
420
458
  import { readFileSync } from "fs";
421
459
  import { homedir } from "os";
422
- import path4 from "path";
460
+ import path5 from "path";
423
461
  var CLAUDE_SETTINGS_PATH_ENV = "LLMWIKI_CLAUDE_SETTINGS_PATH";
424
462
  function isRecord(value) {
425
463
  return typeof value === "object" && value !== null;
@@ -430,7 +468,7 @@ function normalize(value) {
430
468
  return trimmed.length > 0 ? trimmed : void 0;
431
469
  }
432
470
  function resolveClaudeSettingsPath(env) {
433
- return env[CLAUDE_SETTINGS_PATH_ENV] ?? path4.join(homedir(), ".claude", "settings.json");
471
+ return env[CLAUDE_SETTINGS_PATH_ENV] ?? path5.join(homedir(), ".claude", "settings.json");
434
472
  }
435
473
  function readClaudeSettingsFile(settingsPath) {
436
474
  try {
@@ -563,9 +601,9 @@ async function ingestImage(filePath) {
563
601
  `Image ingest requires the Anthropic provider (vision). Current provider: "${providerName}". Set LLMWIKI_PROVIDER=anthropic and ANTHROPIC_API_KEY to use image ingest.`
564
602
  );
565
603
  }
566
- const ext = path5.extname(filePath).toLowerCase();
604
+ const ext = path6.extname(filePath).toLowerCase();
567
605
  const mimeType = mimeTypeForExtension(ext);
568
- const imageBuffer = await readFile4(filePath);
606
+ const imageBuffer = await readFile5(filePath);
569
607
  const imageData = imageBuffer.toString("base64");
570
608
  const client = buildClient();
571
609
  const model = resolveAnthropicModelFromEnv() ?? PROVIDER_MODELS.anthropic;
@@ -575,8 +613,8 @@ async function ingestImage(filePath) {
575
613
  }
576
614
 
577
615
  // src/ingest/transcript.ts
578
- import { readFile as readFile5 } from "fs/promises";
579
- import path6 from "path";
616
+ import { readFile as readFile6 } from "fs/promises";
617
+ import path7 from "path";
580
618
  import { YoutubeTranscript } from "youtube-transcript";
581
619
  var YOUTUBE_URL_PATTERN = /^https?:\/\/(www\.)?(youtube\.com\/watch|youtu\.be\/)/;
582
620
  var SRT_SEQUENCE_PATTERN = /^\d+$/;
@@ -661,8 +699,8 @@ async function ingestTranscript(source2) {
661
699
  if (isYoutubeUrl(source2)) {
662
700
  return fetchYoutubeTranscript(source2);
663
701
  }
664
- const ext = path6.extname(source2).toLowerCase();
665
- const raw = await readFile5(source2, "utf-8");
702
+ const ext = path7.extname(source2).toLowerCase();
703
+ const raw = await readFile6(source2, "utf-8");
666
704
  if (ext === ".vtt") return parseVtt(raw, source2);
667
705
  if (ext === ".srt") return parseSrt(raw, source2);
668
706
  if (ext === ".txt") return parsePlainTranscript(raw, source2);
@@ -701,7 +739,7 @@ function hasSpeakerDialoguePattern(sample) {
701
739
  return hasEnoughSpeakers && hasRepeatedSpeaker;
702
740
  }
703
741
  async function looksLikeTxtTranscript(filePath) {
704
- const raw = await readFile6(filePath, "utf-8");
742
+ const raw = await readFile7(filePath, "utf-8");
705
743
  const sample = raw.slice(0, TXT_SNIFF_BYTES);
706
744
  if (hasSpeakerDialoguePattern(sample)) return true;
707
745
  const timestampMatches = sample.match(new RegExp(TIMESTAMP_PATTERN2.source, "gm"));
@@ -741,7 +779,7 @@ function enforceMinContent(content) {
741
779
  }
742
780
  async function detectSourceType(source2) {
743
781
  if (!isUrl(source2)) {
744
- const ext = path7.extname(source2).toLowerCase();
782
+ const ext = path8.extname(source2).toLowerCase();
745
783
  if (ext === ".pdf") return "pdf";
746
784
  if (IMAGE_EXTENSIONS.has(ext)) return "image";
747
785
  if (TRANSCRIPT_EXTENSIONS.has(ext)) return "transcript";
@@ -787,13 +825,6 @@ async function fetchContent(source2, sourceType) {
787
825
  return ingestFile(source2);
788
826
  }
789
827
  }
790
- async function saveSource(title, document) {
791
- const filename = `${slugify(title)}.md`;
792
- const destPath = path7.join(SOURCES_DIR, filename);
793
- await mkdir2(SOURCES_DIR, { recursive: true });
794
- await writeFile2(destPath, document, "utf-8");
795
- return destPath;
796
- }
797
828
  async function ingestSource(source2) {
798
829
  const sourceType = await detectSourceType(source2);
799
830
  status("*", info(`Ingesting [${sourceType}]: ${source2}`));
@@ -801,9 +832,9 @@ async function ingestSource(source2) {
801
832
  const result = enforceCharLimit(content);
802
833
  enforceMinContent(result.content);
803
834
  const document = buildDocument(title, source2, result, sourceType);
804
- const savedPath = await saveSource(title, document);
835
+ const savedPath = await saveSource(title, document, source2);
805
836
  return {
806
- filename: path7.basename(savedPath),
837
+ filename: path8.basename(savedPath),
807
838
  charCount: result.content.length,
808
839
  truncated: result.truncated,
809
840
  source: source2,
@@ -812,7 +843,7 @@ async function ingestSource(source2) {
812
843
  }
813
844
  async function ingest(source2) {
814
845
  const result = await ingestSource(source2);
815
- const savedPath = path7.join(SOURCES_DIR, result.filename);
846
+ const savedPath = path8.join(SOURCES_DIR, result.filename);
816
847
  status(
817
848
  "+",
818
849
  success(`Saved ${bold(result.filename)} \u2192 ${source(savedPath)}`)
@@ -820,27 +851,390 @@ async function ingest(source2) {
820
851
  status("\u2192", dim("Next: llmwiki compile"));
821
852
  }
822
853
 
854
+ // src/commands/ingest-session.ts
855
+ import path12 from "path";
856
+ import { readdir, stat } from "fs/promises";
857
+
858
+ // src/adapters/claude.ts
859
+ import { readFile as readFile8 } from "fs/promises";
860
+ import path9 from "path";
861
+
862
+ // src/adapters/utils.ts
863
+ var MAX_TITLE_CHARS = 80;
864
+ function truncateTitle(text) {
865
+ const trimmed = text.trim();
866
+ return trimmed.length > MAX_TITLE_CHARS ? trimmed.slice(0, MAX_TITLE_CHARS).trimEnd() + "\u2026" : trimmed;
867
+ }
868
+ function resolveSessionTitle(rawTitle, firstUserContent, defaultTitle) {
869
+ if (rawTitle && rawTitle.trim().length > 0) return truncateTitle(rawTitle);
870
+ if (firstUserContent) {
871
+ const firstLine = firstUserContent.split("\n")[0];
872
+ if (firstLine.trim().length > 0) return truncateTitle(firstLine);
873
+ }
874
+ return defaultTitle;
875
+ }
876
+ function parseJsonOrThrow(raw, filePath) {
877
+ try {
878
+ return JSON.parse(raw);
879
+ } catch {
880
+ throw new Error(`Invalid JSON in session file: ${filePath}`);
881
+ }
882
+ }
883
+
884
+ // src/adapters/claude.ts
885
+ var CLAUDE_EXTENSION = ".jsonl";
886
+ var CLAUDE_TYPE_MARKERS = /* @__PURE__ */ new Set(["user", "assistant", "system", "tool_use", "tool_result"]);
887
+ function extractText(content) {
888
+ if (typeof content === "string") return content;
889
+ return content.filter((b) => b.type === "text" && typeof b.text === "string").map((b) => b.text).join("\n");
890
+ }
891
+ function titleFromFirstUserMessage(turns) {
892
+ const firstUser = turns.find((t) => t.role === "user" && t.content.trim().length > 0);
893
+ return resolveSessionTitle(void 0, firstUser?.content, "Claude Session");
894
+ }
895
+ function parseLine(line) {
896
+ try {
897
+ return JSON.parse(line);
898
+ } catch {
899
+ return null;
900
+ }
901
+ }
902
+ function eventToTurn(event) {
903
+ if (!event.message || !event.message.role) return null;
904
+ const role = event.message.role;
905
+ if (role !== "user" && role !== "assistant") return null;
906
+ const content = extractText(event.message.content);
907
+ if (content.trim().length === 0) return null;
908
+ return { role, content, timestamp: event.timestamp };
909
+ }
910
+ var claudeAdapter = {
911
+ name: "claude",
912
+ async detect(filePath) {
913
+ if (path9.extname(filePath).toLowerCase() !== CLAUDE_EXTENSION) return false;
914
+ const raw = await readFile8(filePath, "utf-8").catch(() => "");
915
+ const firstLine = raw.split("\n")[0].trim();
916
+ if (!firstLine.startsWith("{")) return false;
917
+ try {
918
+ const obj = JSON.parse(firstLine);
919
+ return typeof obj.type === "string" && CLAUDE_TYPE_MARKERS.has(obj.type);
920
+ } catch {
921
+ return false;
922
+ }
923
+ },
924
+ async parse(filePath) {
925
+ const raw = await readFile8(filePath, "utf-8");
926
+ const lines = raw.split("\n").filter((l) => l.trim().length > 0);
927
+ if (lines.length === 0) {
928
+ throw new Error(`Claude session file is empty: ${filePath}`);
929
+ }
930
+ const turns = [];
931
+ const timestamps = [];
932
+ for (const [index, line] of lines.entries()) {
933
+ const event = parseLine(line);
934
+ if (event === null) {
935
+ throw new Error(
936
+ `Malformed JSON on line ${index + 1} of Claude session: ${filePath}`
937
+ );
938
+ }
939
+ if (event.timestamp) timestamps.push(event.timestamp);
940
+ const turn = eventToTurn(event);
941
+ if (turn) turns.push(turn);
942
+ }
943
+ const title = titleFromFirstUserMessage(turns);
944
+ return {
945
+ title,
946
+ adapter: "claude",
947
+ startedAt: timestamps[0],
948
+ endedAt: timestamps[timestamps.length - 1],
949
+ participantIdentity: "Claude Code",
950
+ turns
951
+ };
952
+ }
953
+ };
954
+
955
+ // src/adapters/codex.ts
956
+ import { readFile as readFile9 } from "fs/promises";
957
+ import path10 from "path";
958
+ var CODEX_EXTENSION = ".json";
959
+ function unixToIso(ts) {
960
+ return new Date(ts * 1e3).toISOString();
961
+ }
962
+ function extractTurns(mapping) {
963
+ const turns = [];
964
+ for (const node of Object.values(mapping)) {
965
+ const msg = node.message;
966
+ if (!msg) continue;
967
+ const role = msg.author?.role;
968
+ if (role !== "user" && role !== "assistant") continue;
969
+ const content = (msg.content?.parts ?? []).join("\n").trim();
970
+ if (content.length === 0) continue;
971
+ turns.push({
972
+ role,
973
+ content,
974
+ timestamp: msg.create_time != null ? unixToIso(msg.create_time) : void 0
975
+ });
976
+ }
977
+ turns.sort((a, b) => {
978
+ if (!a.timestamp || !b.timestamp) return 0;
979
+ return a.timestamp.localeCompare(b.timestamp);
980
+ });
981
+ return turns;
982
+ }
983
+ function isCodexExport(value) {
984
+ return Array.isArray(value) && value.length > 0 && typeof value[0].mapping === "object";
985
+ }
986
+ var codexAdapter = {
987
+ name: "codex",
988
+ async detect(filePath) {
989
+ if (path10.extname(filePath).toLowerCase() !== CODEX_EXTENSION) return false;
990
+ const raw = await readFile9(filePath, "utf-8").catch(() => "");
991
+ if (raw.trimStart()[0] !== "[") return false;
992
+ try {
993
+ return isCodexExport(JSON.parse(raw));
994
+ } catch {
995
+ return false;
996
+ }
997
+ },
998
+ async parse(filePath) {
999
+ const raw = await readFile9(filePath, "utf-8");
1000
+ const parsed = parseJsonOrThrow(raw, filePath);
1001
+ if (!isCodexExport(parsed)) {
1002
+ throw new Error(
1003
+ `Codex session file does not contain a conversation array: ${filePath}`
1004
+ );
1005
+ }
1006
+ const conv = parsed[0];
1007
+ const turns = extractTurns(conv.mapping ?? {});
1008
+ const firstUser = turns.find((t) => t.role === "user");
1009
+ return {
1010
+ title: resolveSessionTitle(conv.title, firstUser?.content, "Codex Session"),
1011
+ adapter: "codex",
1012
+ startedAt: conv.create_time != null ? unixToIso(conv.create_time) : void 0,
1013
+ endedAt: conv.update_time != null ? unixToIso(conv.update_time) : void 0,
1014
+ participantIdentity: "OpenAI Codex",
1015
+ turns
1016
+ };
1017
+ }
1018
+ };
1019
+
1020
+ // src/adapters/cursor.ts
1021
+ import { readFile as readFile10 } from "fs/promises";
1022
+ import path11 from "path";
1023
+ var CURSOR_EXTENSION = ".json";
1024
+ function isTabsExport(value) {
1025
+ return typeof value === "object" && value !== null && "tabs" in value && Array.isArray(value.tabs);
1026
+ }
1027
+ function isFlatExport(value) {
1028
+ return typeof value === "object" && value !== null && "messages" in value && Array.isArray(value.messages);
1029
+ }
1030
+ function extractMessagesAndTitle(data) {
1031
+ if (isTabsExport(data)) {
1032
+ const tab = data.tabs[0];
1033
+ return { messages: tab?.messages ?? [], title: tab?.title };
1034
+ }
1035
+ return { messages: data.messages, title: data.title };
1036
+ }
1037
+ function toTurns(messages) {
1038
+ const turns = [];
1039
+ for (const msg of messages) {
1040
+ const role = msg.role;
1041
+ if (role !== "user" && role !== "assistant") continue;
1042
+ const content = (msg.content ?? "").trim();
1043
+ if (content.length === 0) continue;
1044
+ turns.push({ role, content, timestamp: msg.timestamp });
1045
+ }
1046
+ return turns;
1047
+ }
1048
+ var cursorAdapter = {
1049
+ name: "cursor",
1050
+ async detect(filePath) {
1051
+ if (path11.extname(filePath).toLowerCase() !== CURSOR_EXTENSION) return false;
1052
+ const raw = await readFile10(filePath, "utf-8").catch(() => "");
1053
+ if (raw.trimStart()[0] !== "{") return false;
1054
+ try {
1055
+ const parsed = JSON.parse(raw);
1056
+ return isTabsExport(parsed) || isFlatExport(parsed);
1057
+ } catch {
1058
+ return false;
1059
+ }
1060
+ },
1061
+ async parse(filePath) {
1062
+ const raw = await readFile10(filePath, "utf-8");
1063
+ const parsed = parseJsonOrThrow(raw, filePath);
1064
+ if (!isTabsExport(parsed) && !isFlatExport(parsed)) {
1065
+ throw new Error(
1066
+ `Cursor session file does not match a known Cursor export schema: ${filePath}`
1067
+ );
1068
+ }
1069
+ const { messages, title: rawTitle } = extractMessagesAndTitle(parsed);
1070
+ const turns = toTurns(messages);
1071
+ const firstUser = turns.find((t) => t.role === "user");
1072
+ const timestamps = turns.filter((t) => t.timestamp != null).map((t) => t.timestamp);
1073
+ return {
1074
+ title: resolveSessionTitle(rawTitle, firstUser?.content, "Cursor Session"),
1075
+ adapter: "cursor",
1076
+ startedAt: timestamps[0],
1077
+ endedAt: timestamps[timestamps.length - 1],
1078
+ participantIdentity: "Cursor AI",
1079
+ turns
1080
+ };
1081
+ }
1082
+ };
1083
+
1084
+ // src/adapters/registry.ts
1085
+ var ADAPTERS = [claudeAdapter, codexAdapter, cursorAdapter];
1086
+ async function detectAdapter(filePath) {
1087
+ for (const adapter of ADAPTERS) {
1088
+ if (await adapter.detect(filePath)) return adapter;
1089
+ }
1090
+ return null;
1091
+ }
1092
+ async function parseSessionFile(filePath) {
1093
+ const adapter = await detectAdapter(filePath);
1094
+ if (!adapter) {
1095
+ throw new Error(
1096
+ `No session adapter recognised the file: ${filePath}
1097
+ Supported formats: ${ADAPTERS.map((a) => a.name).join(", ")}`
1098
+ );
1099
+ }
1100
+ const session = await adapter.parse(filePath);
1101
+ assertSessionHasUsableTurns(session, filePath);
1102
+ return session;
1103
+ }
1104
+ function assertSessionHasUsableTurns(session, filePath) {
1105
+ const hasUsableTurn = session.turns.some(
1106
+ (t) => (t.role === "user" || t.role === "assistant") && t.content.trim().length > 0
1107
+ );
1108
+ if (!hasUsableTurn) {
1109
+ throw new Error(
1110
+ `${session.adapter} session has no usable turns: ${filePath}
1111
+ The file matches the ${session.adapter} export shape, but no user or assistant message with content was found. Re-export the session or delete the file if it is empty.`
1112
+ );
1113
+ }
1114
+ }
1115
+ function formatSessionAsMarkdown(session) {
1116
+ const lines = [];
1117
+ for (const turn of session.turns) {
1118
+ const label = turn.role === "user" ? "User" : session.participantIdentity ?? "Assistant";
1119
+ const heading = turn.timestamp ? `### ${label} _(${turn.timestamp})_` : `### ${label}`;
1120
+ lines.push(heading);
1121
+ lines.push("");
1122
+ lines.push(turn.content);
1123
+ lines.push("");
1124
+ }
1125
+ return lines.join("\n").trimEnd();
1126
+ }
1127
+
1128
+ // src/commands/ingest-session.ts
1129
+ function buildSessionFrontmatter(session, sourcePath) {
1130
+ const meta = {
1131
+ title: session.title,
1132
+ source: sourcePath,
1133
+ adapter: session.adapter,
1134
+ ingestedAt: (/* @__PURE__ */ new Date()).toISOString()
1135
+ };
1136
+ if (session.startedAt) meta.sessionStartedAt = session.startedAt;
1137
+ if (session.endedAt) meta.sessionEndedAt = session.endedAt;
1138
+ if (session.participantIdentity) meta.participant = session.participantIdentity;
1139
+ return buildFrontmatter(meta);
1140
+ }
1141
+ async function saveSessionSource(session, sourcePath) {
1142
+ const frontmatter = buildSessionFrontmatter(session, sourcePath);
1143
+ const body = formatSessionAsMarkdown(session);
1144
+ const document = `${frontmatter}
1145
+
1146
+ ${body}
1147
+ `;
1148
+ return saveSource(session.title, document, sourcePath);
1149
+ }
1150
+ async function ingestSessionFile(filePath) {
1151
+ status("*", info(`Ingesting session: ${filePath}`));
1152
+ const session = await parseSessionFile(filePath);
1153
+ const savedPath = await saveSessionSource(session, filePath);
1154
+ status(
1155
+ "+",
1156
+ success(
1157
+ `Saved ${bold(path12.basename(savedPath))} [${session.adapter}] \u2192 ${source(savedPath)}`
1158
+ )
1159
+ );
1160
+ return {
1161
+ filename: path12.basename(savedPath),
1162
+ adapter: session.adapter,
1163
+ title: session.title,
1164
+ source: filePath
1165
+ };
1166
+ }
1167
+ async function listDirectoryFiles(dirPath) {
1168
+ const entries = await readdir(dirPath);
1169
+ const files = [];
1170
+ for (const entry of entries) {
1171
+ const full = path12.join(dirPath, entry);
1172
+ const info2 = await stat(full);
1173
+ if (info2.isFile()) files.push(full);
1174
+ }
1175
+ return files;
1176
+ }
1177
+ async function ingestDirectory(dirPath) {
1178
+ const files = await listDirectoryFiles(dirPath);
1179
+ if (files.length === 0) {
1180
+ throw new Error(`No files found in directory: ${dirPath}`);
1181
+ }
1182
+ status("*", info(`Scanning ${files.length} file(s) in: ${dirPath}`));
1183
+ let imported = 0;
1184
+ let skipped = 0;
1185
+ for (const file of files) {
1186
+ try {
1187
+ await ingestSessionFile(file);
1188
+ imported++;
1189
+ } catch (err) {
1190
+ const message = err instanceof Error ? err.message : String(err);
1191
+ status("!", warn(`Skipped ${path12.basename(file)}: ${message}`));
1192
+ skipped++;
1193
+ }
1194
+ }
1195
+ if (imported === 0) {
1196
+ throw new Error(
1197
+ `No sessions imported from ${dirPath} (${skipped} file(s) skipped). Check that at least one file is in a supported session format.`
1198
+ );
1199
+ }
1200
+ status(
1201
+ "\u2192",
1202
+ dim(`Imported ${imported} session(s), skipped ${skipped}.`)
1203
+ );
1204
+ }
1205
+ async function ingestSession(targetPath) {
1206
+ const info2 = await stat(targetPath).catch(() => {
1207
+ throw new Error(`Path not found: ${targetPath}`);
1208
+ });
1209
+ if (info2.isDirectory()) {
1210
+ await ingestDirectory(targetPath);
1211
+ } else {
1212
+ await ingestSessionFile(targetPath);
1213
+ }
1214
+ status("\u2192", dim("Next: llmwiki compile"));
1215
+ }
1216
+
823
1217
  // src/commands/compile.ts
824
1218
  import { existsSync as existsSync7 } from "fs";
825
1219
 
826
1220
  // src/compiler/index.ts
827
- import { readFile as readFile14 } from "fs/promises";
828
- import path21 from "path";
1221
+ import { readFile as readFile18 } from "fs/promises";
1222
+ import path26 from "path";
829
1223
 
830
1224
  // src/utils/state.ts
831
- import { readFile as readFile7, writeFile as writeFile3, rename as rename2, mkdir as mkdir3, copyFile } from "fs/promises";
1225
+ import { readFile as readFile11, writeFile as writeFile3, rename as rename2, mkdir as mkdir3, copyFile } from "fs/promises";
832
1226
  import { existsSync } from "fs";
833
- import path8 from "path";
1227
+ import path13 from "path";
834
1228
  function emptyState() {
835
1229
  return { version: 1, indexHash: "", sources: {} };
836
1230
  }
837
1231
  async function readState(root) {
838
- const filePath = path8.join(root, STATE_FILE);
1232
+ const filePath = path13.join(root, STATE_FILE);
839
1233
  if (!existsSync(filePath)) {
840
1234
  return emptyState();
841
1235
  }
842
1236
  try {
843
- const raw = await readFile7(filePath, "utf-8");
1237
+ const raw = await readFile11(filePath, "utf-8");
844
1238
  return JSON.parse(raw);
845
1239
  } catch {
846
1240
  const bakPath = filePath + ".bak";
@@ -850,9 +1244,9 @@ async function readState(root) {
850
1244
  }
851
1245
  }
852
1246
  async function writeState(root, state) {
853
- const dir = path8.join(root, LLMWIKI_DIR);
1247
+ const dir = path13.join(root, LLMWIKI_DIR);
854
1248
  await mkdir3(dir, { recursive: true });
855
- const filePath = path8.join(root, STATE_FILE);
1249
+ const filePath = path13.join(root, STATE_FILE);
856
1250
  const tmpPath = filePath + ".tmp";
857
1251
  await writeFile3(tmpPath, JSON.stringify(state, null, 2), "utf-8");
858
1252
  await rename2(tmpPath, filePath);
@@ -869,18 +1263,18 @@ async function removeSourceState(root, sourceFile) {
869
1263
  }
870
1264
 
871
1265
  // src/compiler/source-state.ts
872
- import path10 from "path";
1266
+ import path15 from "path";
873
1267
 
874
1268
  // src/compiler/hasher.ts
875
- import { createHash } from "crypto";
876
- import { readFile as readFile8, readdir } from "fs/promises";
877
- import path9 from "path";
1269
+ import { createHash as createHash2 } from "crypto";
1270
+ import { readFile as readFile12, readdir as readdir2 } from "fs/promises";
1271
+ import path14 from "path";
878
1272
  async function hashFile(filePath) {
879
- const content = await readFile8(filePath, "utf-8");
880
- return createHash("sha256").update(content).digest("hex");
1273
+ const content = await readFile12(filePath, "utf-8");
1274
+ return createHash2("sha256").update(content).digest("hex");
881
1275
  }
882
1276
  async function detectChanges(root, prevState) {
883
- const sourcesPath = path9.join(root, SOURCES_DIR);
1277
+ const sourcesPath = path14.join(root, SOURCES_DIR);
884
1278
  const currentFiles = await listSourceFiles(sourcesPath);
885
1279
  const changes = [];
886
1280
  for (const file of currentFiles) {
@@ -893,14 +1287,14 @@ async function detectChanges(root, prevState) {
893
1287
  }
894
1288
  async function listSourceFiles(sourcesPath) {
895
1289
  try {
896
- const entries = await readdir(sourcesPath);
1290
+ const entries = await readdir2(sourcesPath);
897
1291
  return entries.filter((f) => f.endsWith(".md"));
898
1292
  } catch {
899
1293
  return [];
900
1294
  }
901
1295
  }
902
1296
  async function classifyFile(root, file, prevState) {
903
- const filePath = path9.join(root, SOURCES_DIR, file);
1297
+ const filePath = path14.join(root, SOURCES_DIR, file);
904
1298
  const hash = await hashFile(filePath);
905
1299
  const prev = prevState.sources[file];
906
1300
  if (!prev) return "new";
@@ -923,7 +1317,7 @@ async function buildExtractionSourceStates(root, extractions) {
923
1317
  return snapshot;
924
1318
  }
925
1319
  async function buildEntry(root, result, compiledAt) {
926
- const filePath = path10.join(root, SOURCES_DIR, result.sourceFile);
1320
+ const filePath = path15.join(root, SOURCES_DIR, result.sourceFile);
927
1321
  const hash = await hashFile(filePath);
928
1322
  return {
929
1323
  hash,
@@ -1161,8 +1555,8 @@ async function callClaude(options) {
1161
1555
  }
1162
1556
 
1163
1557
  // src/utils/lock.ts
1164
- import { open, readFile as readFile9, unlink, mkdir as mkdir4 } from "fs/promises";
1165
- import path11 from "path";
1558
+ import { open, readFile as readFile13, unlink, mkdir as mkdir4 } from "fs/promises";
1559
+ import path16 from "path";
1166
1560
  var RECLAIM_SUFFIX = ".reclaim";
1167
1561
  var MAX_ACQUIRE_ATTEMPTS = 2;
1168
1562
  function isProcessAlive(pid) {
@@ -1174,8 +1568,8 @@ function isProcessAlive(pid) {
1174
1568
  }
1175
1569
  }
1176
1570
  async function acquireLock(root) {
1177
- const lockPath = path11.join(root, LOCK_FILE);
1178
- await mkdir4(path11.join(root, LLMWIKI_DIR), { recursive: true });
1571
+ const lockPath = path16.join(root, LOCK_FILE);
1572
+ await mkdir4(path16.join(root, LLMWIKI_DIR), { recursive: true });
1179
1573
  for (let attempt = 0; attempt < MAX_ACQUIRE_ATTEMPTS; attempt++) {
1180
1574
  const created = await tryCreateLock(lockPath);
1181
1575
  if (created) return true;
@@ -1238,7 +1632,7 @@ async function tryCreateLock(lockPath) {
1238
1632
  }
1239
1633
  async function isLockStale(lockPath) {
1240
1634
  try {
1241
- const content = await readFile9(lockPath, "utf-8");
1635
+ const content = await readFile13(lockPath, "utf-8");
1242
1636
  const pid = parseInt(content.trim(), 10);
1243
1637
  if (isNaN(pid)) return true;
1244
1638
  return !isProcessAlive(pid);
@@ -1247,14 +1641,32 @@ async function isLockStale(lockPath) {
1247
1641
  }
1248
1642
  }
1249
1643
  async function releaseLock(root) {
1250
- const lockPath = path11.join(root, LOCK_FILE);
1644
+ const lockPath = path16.join(root, LOCK_FILE);
1251
1645
  try {
1252
1646
  await unlink(lockPath);
1253
1647
  } catch {
1254
1648
  }
1255
1649
  }
1256
1650
 
1651
+ // src/utils/output-language.ts
1652
+ var LANG_ENV_VAR = "LLMWIKI_OUTPUT_LANG";
1653
+ function getOutputLanguage() {
1654
+ const raw = process.env[LANG_ENV_VAR];
1655
+ if (!raw) return null;
1656
+ const trimmed = raw.trim();
1657
+ return trimmed.length > 0 ? trimmed : null;
1658
+ }
1659
+ function languageDirective() {
1660
+ const lang = getOutputLanguage();
1661
+ if (!lang) return "";
1662
+ return `Write the output in ${lang}.`;
1663
+ }
1664
+
1257
1665
  // src/compiler/prompts.ts
1666
+ function withLangLine(...lines) {
1667
+ const lang = languageDirective();
1668
+ return lang ? [...lines, lang] : lines;
1669
+ }
1258
1670
  var PROVENANCE_STATE_VALUES = [
1259
1671
  "extracted",
1260
1672
  "merged",
@@ -1309,10 +1721,6 @@ var CONCEPT_EXTRACTION_TOOL = {
1309
1721
  required: ["slug"]
1310
1722
  },
1311
1723
  description: "Slugs of other concepts whose evidence contradicts this one."
1312
- },
1313
- inferred_paragraphs: {
1314
- type: "integer",
1315
- description: "Estimated number of paragraphs in the page that will be inferred rather than directly cited."
1316
1724
  }
1317
1725
  },
1318
1726
  required: ["concept", "summary", "is_new"]
@@ -1329,11 +1737,13 @@ Here is the existing wiki index \u2014 avoid duplicating concepts already covere
1329
1737
 
1330
1738
  ${existingIndex}` : "\n\nNo existing wiki pages yet.";
1331
1739
  return [
1332
- "You are a knowledge extraction engine. Analyze the following source document",
1333
- "and identify 3-8 distinct, meaningful concepts worth documenting as wiki pages.",
1334
- "Each concept should be a standalone topic that someone might look up.",
1335
- "Focus on key ideas, techniques, patterns, or entities \u2014 not trivial details.",
1336
- "Use the extract_concepts tool to return your findings.",
1740
+ ...withLangLine(
1741
+ "You are a knowledge extraction engine. Analyze the following source document",
1742
+ "and identify 3-8 distinct, meaningful concepts worth documenting as wiki pages.",
1743
+ "Each concept should be a standalone topic that someone might look up.",
1744
+ "Focus on key ideas, techniques, patterns, or entities \u2014 not trivial details.",
1745
+ "Use the extract_concepts tool to return your findings."
1746
+ ),
1337
1747
  "",
1338
1748
  "For every concept, emit provenance metadata so downstream tools can reason",
1339
1749
  "about reliability:",
@@ -1343,8 +1753,6 @@ ${existingIndex}` : "\n\nNo existing wiki pages yet.";
1343
1753
  " or 'ambiguous' if the source is contradictory or unclear.",
1344
1754
  " - contradicted_by: slugs of other concepts (in this batch or the index)",
1345
1755
  " whose evidence conflicts with this one.",
1346
- " - inferred_paragraphs: estimated number of paragraphs in the resulting",
1347
- " page that will be inferred rather than directly citable.",
1348
1756
  indexSection,
1349
1757
  "\n\n--- SOURCE DOCUMENT ---\n\n",
1350
1758
  sourceContent
@@ -1362,11 +1770,13 @@ Related wiki pages for cross-referencing:
1362
1770
 
1363
1771
  ${relatedPages}` : "";
1364
1772
  return [
1365
- `You are a wiki author. Write a clear, well-structured markdown page about "${concept}".`,
1366
- "Draw facts only from the provided source material.",
1367
- "Include a ## Sources section at the end listing the source document.",
1368
- "Suggest [[wikilinks]] to related concepts where appropriate.",
1369
- "Write in a neutral, informative tone. Be concise but thorough.",
1773
+ ...withLangLine(
1774
+ `You are a wiki author. Write a clear, well-structured markdown page about "${concept}".`,
1775
+ "Draw facts only from the provided source material.",
1776
+ "Include a ## Sources section at the end listing the source document.",
1777
+ "Suggest [[wikilinks]] to related concepts where appropriate.",
1778
+ "Write in a neutral, informative tone. Be concise but thorough."
1779
+ ),
1370
1780
  "",
1371
1781
  "Source attribution: at the end of each prose paragraph, append a citation",
1372
1782
  "marker showing which source file(s) the paragraph drew from.",
@@ -1383,7 +1793,7 @@ ${relatedPages}` : "";
1383
1793
  "",
1384
1794
  "If a paragraph is your inference rather than a direct extraction, leave it",
1385
1795
  "uncited \u2014 downstream lint rules will count uncited paragraphs as 'inferred'",
1386
- "to compute the page's provenance metadata.",
1796
+ "so lint can surface excess-inferred-paragraphs warnings on review.",
1387
1797
  existingSection,
1388
1798
  relatedSection,
1389
1799
  "\n\n--- SOURCE MATERIAL ---\n\n",
@@ -1415,20 +1825,21 @@ function mapRawConcept(c) {
1415
1825
  tags: Array.isArray(c.tags) ? c.tags : void 0,
1416
1826
  confidence: typeof c.confidence === "number" ? c.confidence : void 0,
1417
1827
  provenanceState: provenance,
1418
- contradictedBy: coerceContradictedBy(c.contradicted_by),
1419
- inferredParagraphs: typeof c.inferred_paragraphs === "number" && Number.isInteger(c.inferred_paragraphs) && c.inferred_paragraphs >= 0 ? c.inferred_paragraphs : void 0
1828
+ contradictedBy: coerceContradictedBy(c.contradicted_by)
1420
1829
  };
1421
1830
  }
1422
1831
  function buildSeedPagePrompt(seed, rule, relatedPagesContent) {
1423
1832
  const minLinks = rule.minWikilinks;
1424
1833
  const linkExpectation = minLinks > 0 ? `Include at least ${minLinks} [[wikilinks]] to related pages.` : "Use [[wikilinks]] when referencing other pages.";
1425
1834
  return [
1426
- `You are a wiki author. Write a ${seed.kind} page titled "${seed.title}".`,
1427
- `Page-kind guidance: ${rule.description}`,
1428
- `Summary line for context: ${seed.summary}`,
1429
- "Draw facts only from the related wiki pages provided below.",
1430
- linkExpectation,
1431
- "Write in a neutral, informative tone. Be concise but thorough.",
1835
+ ...withLangLine(
1836
+ `You are a wiki author. Write a ${seed.kind} page titled "${seed.title}".`,
1837
+ `Page-kind guidance: ${rule.description}`,
1838
+ `Summary line for context: ${seed.summary}`,
1839
+ "Draw facts only from the related wiki pages provided below.",
1840
+ linkExpectation,
1841
+ "Write in a neutral, informative tone. Be concise but thorough."
1842
+ ),
1432
1843
  "\n\n--- RELATED PAGES ---\n\n",
1433
1844
  relatedPagesContent
1434
1845
  ].join("\n");
@@ -1490,8 +1901,8 @@ function buildDefaultSchema() {
1490
1901
 
1491
1902
  // src/schema/loader.ts
1492
1903
  import { existsSync as existsSync2 } from "fs";
1493
- import { readFile as readFile10 } from "fs/promises";
1494
- import path12 from "path";
1904
+ import { readFile as readFile14 } from "fs/promises";
1905
+ import path17 from "path";
1495
1906
  import yaml2 from "js-yaml";
1496
1907
  var SCHEMA_CANDIDATE_PATHS = [
1497
1908
  ".llmwiki/schema.json",
@@ -1502,7 +1913,7 @@ var SCHEMA_CANDIDATE_PATHS = [
1502
1913
  ];
1503
1914
  function findSchemaPath(root) {
1504
1915
  for (const candidate of SCHEMA_CANDIDATE_PATHS) {
1505
- const absolute = path12.join(root, candidate);
1916
+ const absolute = path17.join(root, candidate);
1506
1917
  if (existsSync2(absolute)) return absolute;
1507
1918
  }
1508
1919
  return null;
@@ -1555,12 +1966,12 @@ async function loadSchema(root) {
1555
1966
  const defaults = buildDefaultSchema();
1556
1967
  const schemaPath = findSchemaPath(root);
1557
1968
  if (!schemaPath) return defaults;
1558
- const raw = await readFile10(schemaPath, "utf-8");
1969
+ const raw = await readFile14(schemaPath, "utf-8");
1559
1970
  const parsed = parseSchemaFile(schemaPath, raw);
1560
1971
  return applyOverrides(defaults, parsed, schemaPath);
1561
1972
  }
1562
1973
  function defaultSchemaInitPath(root) {
1563
- return path12.join(root, SCHEMA_CANDIDATE_PATHS[0]);
1974
+ return path17.join(root, SCHEMA_CANDIDATE_PATHS[0]);
1564
1975
  }
1565
1976
 
1566
1977
  // src/schema/helpers.ts
@@ -1732,7 +2143,7 @@ async function freezeFailedExtractions(root, results, frozenSlugs) {
1732
2143
  }
1733
2144
 
1734
2145
  // src/compiler/orphan.ts
1735
- import path13 from "path";
2146
+ import path18 from "path";
1736
2147
  async function markOrphaned(root, sourceFile, state) {
1737
2148
  const sourceEntry = state.sources[sourceFile];
1738
2149
  if (!sourceEntry) return;
@@ -1758,7 +2169,7 @@ async function orphanUnownedFrozenPages(root, frozenSlugs) {
1758
2169
  }
1759
2170
  }
1760
2171
  async function orphanPage(root, slug, reason) {
1761
- const pagePath = path13.join(root, CONCEPTS_DIR, `${slug}.md`);
2172
+ const pagePath = path18.join(root, CONCEPTS_DIR, `${slug}.md`);
1762
2173
  const content = await safeReadFile(pagePath);
1763
2174
  if (!content) return;
1764
2175
  const { meta } = parseFrontmatter(content);
@@ -1769,18 +2180,18 @@ async function orphanPage(root, slug, reason) {
1769
2180
  }
1770
2181
 
1771
2182
  // src/compiler/resolver.ts
1772
- import { readdir as readdir2, readFile as readFile11 } from "fs/promises";
1773
- import path14 from "path";
2183
+ import { readdir as readdir3, readFile as readFile15 } from "fs/promises";
2184
+ import path19 from "path";
1774
2185
  import { existsSync as existsSync3 } from "fs";
1775
2186
  async function buildTitleIndex(root) {
1776
- const conceptsDir = path14.join(root, CONCEPTS_DIR);
2187
+ const conceptsDir = path19.join(root, CONCEPTS_DIR);
1777
2188
  if (!existsSync3(conceptsDir)) return [];
1778
- const files = await readdir2(conceptsDir);
2189
+ const files = await readdir3(conceptsDir);
1779
2190
  const pages = [];
1780
2191
  for (const file of files) {
1781
2192
  if (!file.endsWith(".md")) continue;
1782
- const filePath = path14.join(conceptsDir, file);
1783
- const content = await readFile11(filePath, "utf-8");
2193
+ const filePath = path19.join(conceptsDir, file);
2194
+ const content = await readFile15(filePath, "utf-8");
1784
2195
  const { meta } = parseFrontmatter(content);
1785
2196
  if (meta.title && typeof meta.title === "string" && !meta.orphaned) {
1786
2197
  pages.push({
@@ -1866,7 +2277,7 @@ async function resolveInboundLinks(titleIndex, newSlugs) {
1866
2277
  let count = 0;
1867
2278
  for (const page of titleIndex) {
1868
2279
  if (newSlugs.includes(page.slug)) continue;
1869
- const content = await readFile11(page.filePath, "utf-8");
2280
+ const content = await readFile15(page.filePath, "utf-8");
1870
2281
  const { body } = parseFrontmatter(content);
1871
2282
  const linked = addWikilinks(body, newTitles, page.title);
1872
2283
  if (linked !== body) {
@@ -1878,7 +2289,7 @@ async function resolveInboundLinks(titleIndex, newSlugs) {
1878
2289
  return count;
1879
2290
  }
1880
2291
  async function linkPage(page, titleIndex) {
1881
- const content = await readFile11(page.filePath, "utf-8");
2292
+ const content = await readFile15(page.filePath, "utf-8");
1882
2293
  const { body } = parseFrontmatter(content);
1883
2294
  const linked = addWikilinks(body, titleIndex, page.title);
1884
2295
  if (linked === body) return false;
@@ -1888,18 +2299,18 @@ async function linkPage(page, titleIndex) {
1888
2299
  }
1889
2300
 
1890
2301
  // src/compiler/indexgen.ts
1891
- import { readdir as readdir3 } from "fs/promises";
1892
- import path15 from "path";
2302
+ import { readdir as readdir4 } from "fs/promises";
2303
+ import path20 from "path";
1893
2304
  async function generateIndex(root) {
1894
2305
  status("*", info("Generating index..."));
1895
- const conceptsPath = path15.join(root, CONCEPTS_DIR);
1896
- const queriesPath = path15.join(root, QUERIES_DIR);
2306
+ const conceptsPath = path20.join(root, CONCEPTS_DIR);
2307
+ const queriesPath = path20.join(root, QUERIES_DIR);
1897
2308
  const concepts = await collectPageSummaries(conceptsPath);
1898
2309
  const queries = await collectPageSummaries(queriesPath);
1899
2310
  concepts.sort((a, b) => a.title.localeCompare(b.title));
1900
2311
  queries.sort((a, b) => a.title.localeCompare(b.title));
1901
2312
  const indexContent = buildIndexContent(concepts, queries);
1902
- const indexPath = path15.join(root, INDEX_FILE);
2313
+ const indexPath = path20.join(root, INDEX_FILE);
1903
2314
  await atomicWrite(indexPath, indexContent);
1904
2315
  const total = concepts.length + queries.length;
1905
2316
  status("+", success(`Index updated with ${total} pages.`));
@@ -1907,13 +2318,13 @@ async function generateIndex(root) {
1907
2318
  async function scanWikiPages(dirPath) {
1908
2319
  let files;
1909
2320
  try {
1910
- files = await readdir3(dirPath);
2321
+ files = await readdir4(dirPath);
1911
2322
  } catch {
1912
2323
  return [];
1913
2324
  }
1914
2325
  const scanned = [];
1915
2326
  for (const file of files.filter((f) => f.endsWith(".md"))) {
1916
- const content = await safeReadFile(path15.join(dirPath, file));
2327
+ const content = await safeReadFile(path20.join(dirPath, file));
1917
2328
  const { meta } = parseFrontmatter(content);
1918
2329
  scanned.push({ slug: file.replace(/\.md$/, ""), meta });
1919
2330
  }
@@ -1948,9 +2359,45 @@ function buildIndexContent(concepts, queries) {
1948
2359
  return lines.join("\n");
1949
2360
  }
1950
2361
 
2362
+ // src/compiler/prompt-budget.ts
2363
+ var TRUNCATION_MARKER = "\n\n[\u2026truncated for prompt budget \u2014 see #39\u2026]";
2364
+ function resolvePromptBudgetChars() {
2365
+ const raw = process.env[PROMPT_BUDGET_ENV_VAR];
2366
+ if (!raw) return DEFAULT_PROMPT_BUDGET_CHARS;
2367
+ const parsed = Number.parseInt(raw, 10);
2368
+ if (!Number.isFinite(parsed) || parsed <= 0) return DEFAULT_PROMPT_BUDGET_CHARS;
2369
+ return parsed;
2370
+ }
2371
+ function buildBudgetedCombinedContent(concept, slices) {
2372
+ const budget = resolvePromptBudgetChars();
2373
+ const totalRaw = slices.reduce((sum, s) => sum + s.content.length, 0);
2374
+ if (totalRaw <= budget) {
2375
+ return formatSlices(slices);
2376
+ }
2377
+ const perSource = Math.max(1, Math.floor(budget / slices.length));
2378
+ warnTruncation(concept, totalRaw, slices.length, perSource, budget);
2379
+ const trimmed = slices.map(
2380
+ (s) => s.content.length > perSource ? { ...s, content: s.content.slice(0, perSource) + TRUNCATION_MARKER } : s
2381
+ );
2382
+ return formatSlices(trimmed);
2383
+ }
2384
+ function formatSlices(slices) {
2385
+ return slices.map((s) => `--- SOURCE: ${s.file} ---
2386
+
2387
+ ${s.content}`).join("\n\n");
2388
+ }
2389
+ function warnTruncation(concept, totalRaw, sourceCount, perSource, budget) {
2390
+ status(
2391
+ "!",
2392
+ warn(
2393
+ `Combined source content for "${concept}" (${totalRaw.toLocaleString()} chars across ${sourceCount} sources) exceeds the ${budget.toLocaleString()}-char prompt budget; truncating each source to ~${perSource.toLocaleString()} chars. Raise via ${PROMPT_BUDGET_ENV_VAR} when running against larger-context models.`
2394
+ )
2395
+ );
2396
+ }
2397
+
1951
2398
  // src/compiler/obsidian.ts
1952
- import { readdir as readdir4 } from "fs/promises";
1953
- import path16 from "path";
2399
+ import { readdir as readdir5 } from "fs/promises";
2400
+ import path21 from "path";
1954
2401
  var ABBREVIATION_MIN_WORDS = 3;
1955
2402
  var SWAP_CONJUNCTIONS = [" and ", " or "];
1956
2403
  function addObsidianMeta(frontmatter, conceptTitle, tags) {
@@ -1992,23 +2439,23 @@ function generateAbbreviation(title) {
1992
2439
  return abbreviation;
1993
2440
  }
1994
2441
  async function generateMOC(root) {
1995
- const conceptsPath = path16.join(root, CONCEPTS_DIR);
2442
+ const conceptsPath = path21.join(root, CONCEPTS_DIR);
1996
2443
  const pages = await loadConceptPages(conceptsPath);
1997
2444
  const tagGroups = groupPagesByTag(pages);
1998
2445
  const content = buildMOCContent(tagGroups);
1999
- await atomicWrite(path16.join(root, MOC_FILE), content);
2446
+ await atomicWrite(path21.join(root, MOC_FILE), content);
2000
2447
  }
2001
2448
  async function loadConceptPages(conceptsPath) {
2002
2449
  let files;
2003
2450
  try {
2004
- files = await readdir4(conceptsPath);
2451
+ files = await readdir5(conceptsPath);
2005
2452
  } catch {
2006
2453
  return [];
2007
2454
  }
2008
2455
  const pages = [];
2009
2456
  for (const file of files) {
2010
2457
  if (!file.endsWith(".md")) continue;
2011
- const content = await safeReadFile(path16.join(conceptsPath, file));
2458
+ const content = await safeReadFile(path21.join(conceptsPath, file));
2012
2459
  if (!content) continue;
2013
2460
  const { meta } = parseFrontmatter(content);
2014
2461
  if (meta.orphaned) continue;
@@ -2059,14 +2506,14 @@ function buildMOCContent(tagGroups) {
2059
2506
  }
2060
2507
 
2061
2508
  // src/utils/embeddings.ts
2062
- import { readFile as readFile12, readdir as readdir5 } from "fs/promises";
2509
+ import { readFile as readFile16, readdir as readdir6 } from "fs/promises";
2063
2510
  import { existsSync as existsSync4 } from "fs";
2064
- import path17 from "path";
2511
+ import path22 from "path";
2065
2512
 
2066
2513
  // src/utils/retrieval.ts
2067
- import { createHash as createHash2 } from "crypto";
2514
+ import { createHash as createHash3 } from "crypto";
2068
2515
  function hashChunkText(text) {
2069
- return createHash2("sha256").update(text, "utf8").digest("hex").slice(0, 16);
2516
+ return createHash3("sha256").update(text, "utf8").digest("hex").slice(0, 16);
2070
2517
  }
2071
2518
  function splitIntoChunks(body) {
2072
2519
  const paragraphs = extractParagraphs(body);
@@ -2226,13 +2673,13 @@ function findTopKChunks(queryVec, chunks, k) {
2226
2673
  return scored.slice(0, k);
2227
2674
  }
2228
2675
  async function readEmbeddingStore(root) {
2229
- const filePath = path17.join(root, EMBEDDINGS_FILE);
2676
+ const filePath = path22.join(root, EMBEDDINGS_FILE);
2230
2677
  if (!existsSync4(filePath)) return null;
2231
- const raw = await readFile12(filePath, "utf-8");
2678
+ const raw = await readFile16(filePath, "utf-8");
2232
2679
  return JSON.parse(raw);
2233
2680
  }
2234
2681
  async function writeEmbeddingStore(root, store) {
2235
- const filePath = path17.join(root, EMBEDDINGS_FILE);
2682
+ const filePath = path22.join(root, EMBEDDINGS_FILE);
2236
2683
  await atomicWrite(filePath, JSON.stringify(store, null, 2));
2237
2684
  }
2238
2685
  async function findRelevantPages(root, question) {
@@ -2264,10 +2711,10 @@ async function loadActiveStore(root, hasContent) {
2264
2711
  async function collectPageRecords(root) {
2265
2712
  const records = [];
2266
2713
  for (const dir of [CONCEPTS_DIR, QUERIES_DIR]) {
2267
- const absDir = path17.join(root, dir);
2714
+ const absDir = path22.join(root, dir);
2268
2715
  let files;
2269
2716
  try {
2270
- files = await readdir5(absDir);
2717
+ files = await readdir6(absDir);
2271
2718
  } catch {
2272
2719
  continue;
2273
2720
  }
@@ -2279,7 +2726,7 @@ async function collectPageRecords(root) {
2279
2726
  return records;
2280
2727
  }
2281
2728
  async function readPageRecord(absDir, file) {
2282
- const content = await safeReadFile(path17.join(absDir, file));
2729
+ const content = await safeReadFile(path22.join(absDir, file));
2283
2730
  const { meta, body } = parseFrontmatter(content);
2284
2731
  if (meta.orphaned || typeof meta.title !== "string") return null;
2285
2732
  return {
@@ -2441,9 +2888,9 @@ function shouldRunEmbedding(modelChanged, toEmbed, previousEntries, previousChun
2441
2888
  }
2442
2889
 
2443
2890
  // src/compiler/candidates.ts
2444
- import { readdir as readdir6, rename as rename3, unlink as unlink2, writeFile as writeFile4, mkdir as mkdir5 } from "fs/promises";
2891
+ import { readdir as readdir7, rename as rename3, unlink as unlink2, writeFile as writeFile4, mkdir as mkdir5 } from "fs/promises";
2445
2892
  import { existsSync as existsSync5 } from "fs";
2446
- import path18 from "path";
2893
+ import path23 from "path";
2447
2894
  import { randomBytes } from "crypto";
2448
2895
  var ID_SUFFIX_BYTES = 4;
2449
2896
  var CANDIDATE_EXT = ".json";
@@ -2452,10 +2899,10 @@ function buildCandidateId(slug) {
2452
2899
  return `${slug}-${suffix}`;
2453
2900
  }
2454
2901
  function candidatePath(root, id) {
2455
- return path18.join(root, CANDIDATES_DIR, `${id}${CANDIDATE_EXT}`);
2902
+ return path23.join(root, CANDIDATES_DIR, `${id}${CANDIDATE_EXT}`);
2456
2903
  }
2457
2904
  function archivePath(root, id) {
2458
- return path18.join(root, CANDIDATES_ARCHIVE_DIR, `${id}${CANDIDATE_EXT}`);
2905
+ return path23.join(root, CANDIDATES_ARCHIVE_DIR, `${id}${CANDIDATE_EXT}`);
2459
2906
  }
2460
2907
  async function writeCandidate(root, draft) {
2461
2908
  const candidate = {
@@ -2467,7 +2914,8 @@ async function writeCandidate(root, draft) {
2467
2914
  body: draft.body,
2468
2915
  generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
2469
2916
  ...draft.sourceStates ? { sourceStates: draft.sourceStates } : {},
2470
- ...draft.schemaViolations ? { schemaViolations: draft.schemaViolations } : {}
2917
+ ...draft.schemaViolations ? { schemaViolations: draft.schemaViolations } : {},
2918
+ ...draft.provenanceViolations ? { provenanceViolations: draft.provenanceViolations } : {}
2471
2919
  };
2472
2920
  await atomicWrite(candidatePath(root, candidate.id), JSON.stringify(candidate, null, 2));
2473
2921
  return candidate;
@@ -2506,9 +2954,9 @@ function isValidCandidate(value) {
2506
2954
  return typeof candidate.id === "string" && typeof candidate.title === "string" && typeof candidate.slug === "string" && typeof candidate.body === "string" && Array.isArray(candidate.sources);
2507
2955
  }
2508
2956
  async function listCandidates(root) {
2509
- const dir = path18.join(root, CANDIDATES_DIR);
2957
+ const dir = path23.join(root, CANDIDATES_DIR);
2510
2958
  if (!existsSync5(dir)) return [];
2511
- const entries = await readdir6(dir, { withFileTypes: true });
2959
+ const entries = await readdir7(dir, { withFileTypes: true });
2512
2960
  const candidates = [];
2513
2961
  for (const entry of entries) {
2514
2962
  if (!entry.isFile() || !entry.name.endsWith(CANDIDATE_EXT)) continue;
@@ -2533,7 +2981,7 @@ async function archiveCandidate(root, id) {
2533
2981
  const sourcePath = candidatePath(root, id);
2534
2982
  if (!existsSync5(sourcePath)) return false;
2535
2983
  const target = archivePath(root, id);
2536
- await mkdir5(path18.dirname(target), { recursive: true });
2984
+ await mkdir5(path23.dirname(target), { recursive: true });
2537
2985
  try {
2538
2986
  await rename3(sourcePath, target);
2539
2987
  } catch {
@@ -2545,9 +2993,9 @@ async function archiveCandidate(root, id) {
2545
2993
  }
2546
2994
 
2547
2995
  // src/linter/rules.ts
2548
- import { readdir as readdir7, readFile as readFile13 } from "fs/promises";
2996
+ import { readdir as readdir8, readFile as readFile17 } from "fs/promises";
2549
2997
  import { existsSync as existsSync6 } from "fs";
2550
- import path19 from "path";
2998
+ import path24 from "path";
2551
2999
  var MIN_BODY_LENGTH = 50;
2552
3000
  var WIKILINK_PATTERN2 = /\[\[([^\]]+)\]\]/g;
2553
3001
  var CITATION_PATTERN = /\^\[([^\]]+)\]/g;
@@ -2564,26 +3012,26 @@ function findMatchesInContent(content, pattern) {
2564
3012
  }
2565
3013
  async function readMarkdownFiles(dirPath) {
2566
3014
  if (!existsSync6(dirPath)) return [];
2567
- const entries = await readdir7(dirPath);
3015
+ const entries = await readdir8(dirPath);
2568
3016
  const mdFiles = entries.filter((f) => f.endsWith(".md"));
2569
3017
  const results = await Promise.all(
2570
3018
  mdFiles.map(async (fileName) => {
2571
- const filePath = path19.join(dirPath, fileName);
2572
- const content = await readFile13(filePath, "utf-8");
3019
+ const filePath = path24.join(dirPath, fileName);
3020
+ const content = await readFile17(filePath, "utf-8");
2573
3021
  return { filePath, content };
2574
3022
  })
2575
3023
  );
2576
3024
  return results;
2577
3025
  }
2578
3026
  async function collectAllPages(root) {
2579
- const conceptPages = await readMarkdownFiles(path19.join(root, CONCEPTS_DIR));
2580
- const queryPages = await readMarkdownFiles(path19.join(root, QUERIES_DIR));
3027
+ const conceptPages = await readMarkdownFiles(path24.join(root, CONCEPTS_DIR));
3028
+ const queryPages = await readMarkdownFiles(path24.join(root, QUERIES_DIR));
2581
3029
  return [...conceptPages, ...queryPages];
2582
3030
  }
2583
3031
  function buildPageSlugSet(pages) {
2584
3032
  const slugs = /* @__PURE__ */ new Set();
2585
3033
  for (const page of pages) {
2586
- const baseName = path19.basename(page.filePath, ".md");
3034
+ const baseName = path24.basename(page.filePath, ".md");
2587
3035
  slugs.add(baseName.toLowerCase());
2588
3036
  }
2589
3037
  return slugs;
@@ -2730,9 +3178,8 @@ async function checkInferredWithoutCitations(root) {
2730
3178
  const pages = await collectAllPages(root);
2731
3179
  const results = [];
2732
3180
  for (const page of pages) {
2733
- const { meta, body } = parseFrontmatter(page.content);
2734
- const provenance = parseProvenanceMetadata(meta);
2735
- const inferred = provenance.inferredParagraphs ?? countUncitedProseParagraphs(body);
3181
+ const { body } = parseFrontmatter(page.content);
3182
+ const inferred = countUncitedProseParagraphs(body);
2736
3183
  if (inferred <= MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS) continue;
2737
3184
  results.push({
2738
3185
  rule: "excess-inferred-paragraphs",
@@ -2743,7 +3190,7 @@ async function checkInferredWithoutCitations(root) {
2743
3190
  }
2744
3191
  return results;
2745
3192
  }
2746
- var PROSE_PARAGRAPH_LEAD = /^[A-Za-z]/;
3193
+ var PROSE_PARAGRAPH_LEAD = new RegExp("^\\p{L}", "u");
2747
3194
  function countUncitedProseParagraphs(body) {
2748
3195
  const paragraphs = body.split(/\n\s*\n/);
2749
3196
  let count = 0;
@@ -2766,18 +3213,7 @@ async function checkSchemaCrossLinks(root, schema) {
2766
3213
  const pages = await collectAllPages(root);
2767
3214
  const results = [];
2768
3215
  for (const page of pages) {
2769
- const { meta, body } = parseFrontmatter(page.content);
2770
- const kind = resolvePageKind(meta.kind, schema);
2771
- const rule = schema.kinds[kind];
2772
- if (rule.minWikilinks <= 0) continue;
2773
- const linkCount = countWikilinks(body);
2774
- if (linkCount >= rule.minWikilinks) continue;
2775
- results.push({
2776
- rule: "schema-cross-link-minimum",
2777
- severity: "warning",
2778
- file: page.filePath,
2779
- message: `Page kind "${kind}" requires at least ${rule.minWikilinks} [[wikilinks]] but only ${linkCount} found.`
2780
- });
3216
+ results.push(...checkPageCrossLinks(page.content, page.filePath, schema));
2781
3217
  }
2782
3218
  return results;
2783
3219
  }
@@ -2818,13 +3254,24 @@ function countLines(content) {
2818
3254
  }
2819
3255
  async function checkBrokenCitations(root) {
2820
3256
  const pages = await collectAllPages(root);
2821
- const sourcesDir = path19.join(root, SOURCES_DIR);
3257
+ const sourcesDir = path24.join(root, SOURCES_DIR);
2822
3258
  const results = [];
2823
3259
  const lineCountCache = /* @__PURE__ */ new Map();
2824
3260
  for (const page of pages) {
2825
- for (const { captured, line } of findMatchesInContent(page.content, CITATION_PATTERN)) {
2826
- await collectBrokenForMarker(captured, line, page.filePath, sourcesDir, lineCountCache, results);
2827
- }
3261
+ const pageFindings = await checkPageBrokenCitations(
3262
+ page.content,
3263
+ page.filePath,
3264
+ sourcesDir,
3265
+ lineCountCache
3266
+ );
3267
+ results.push(...pageFindings);
3268
+ }
3269
+ return results;
3270
+ }
3271
+ async function checkPageBrokenCitations(content, filePath, sourcesDir, lineCountCache = /* @__PURE__ */ new Map()) {
3272
+ const results = [];
3273
+ for (const { captured, line } of findMatchesInContent(content, CITATION_PATTERN)) {
3274
+ await collectBrokenForMarker(captured, line, filePath, sourcesDir, lineCountCache, results);
2828
3275
  }
2829
3276
  return results;
2830
3277
  }
@@ -2833,7 +3280,7 @@ async function collectBrokenForMarker(captured, line, pageFile, sourcesDir, line
2833
3280
  const trimmed = part.trim();
2834
3281
  if (trimmed.length === 0) continue;
2835
3282
  const filename = stripSpanSuffix(trimmed);
2836
- const citedPath = path19.join(sourcesDir, filename);
3283
+ const citedPath = path24.join(sourcesDir, filename);
2837
3284
  if (!existsSync6(citedPath)) {
2838
3285
  out.push({
2839
3286
  rule: "broken-citation",
@@ -2869,25 +3316,30 @@ async function checkMalformedClaimCitations(root) {
2869
3316
  const pages = await collectAllPages(root);
2870
3317
  const results = [];
2871
3318
  for (const page of pages) {
2872
- for (const { captured, line } of findMatchesInContent(page.content, CITATION_PATTERN)) {
2873
- for (const part of captured.split(",")) {
2874
- if (!isMalformedCitationEntry(part)) continue;
2875
- results.push({
2876
- rule: "malformed-claim-citation",
2877
- severity: "error",
2878
- file: page.filePath,
2879
- message: `Malformed claim citation ^[${captured}] \u2014 expected file.md, file.md:N-N, or file.md#LN-LN`,
2880
- line
2881
- });
2882
- }
3319
+ results.push(...checkPageMalformedCitations(page.content, page.filePath));
3320
+ }
3321
+ return results;
3322
+ }
3323
+ function checkPageMalformedCitations(content, filePath) {
3324
+ const results = [];
3325
+ for (const { captured, line } of findMatchesInContent(content, CITATION_PATTERN)) {
3326
+ for (const part of captured.split(",")) {
3327
+ if (!isMalformedCitationEntry(part)) continue;
3328
+ results.push({
3329
+ rule: "malformed-claim-citation",
3330
+ severity: "error",
3331
+ file: filePath,
3332
+ message: `Malformed claim citation ^[${captured}] \u2014 expected file.md, file.md:N-N, or file.md#LN-LN`,
3333
+ line
3334
+ });
2883
3335
  }
2884
3336
  }
2885
3337
  return results;
2886
3338
  }
2887
3339
 
2888
3340
  // src/compiler/page-renderer.ts
2889
- import { readdir as readdir8 } from "fs/promises";
2890
- import path20 from "path";
3341
+ import { readdir as readdir9 } from "fs/promises";
3342
+ import path25 from "path";
2891
3343
 
2892
3344
  // src/compiler/provenance.ts
2893
3345
  function addProvenanceMeta(fields, concept) {
@@ -2900,9 +3352,6 @@ function addProvenanceMeta(fields, concept) {
2900
3352
  if (concept.contradictedBy && concept.contradictedBy.length > 0) {
2901
3353
  fields.contradictedBy = concept.contradictedBy;
2902
3354
  }
2903
- if (typeof concept.inferredParagraphs === "number") {
2904
- fields.inferredParagraphs = concept.inferredParagraphs;
2905
- }
2906
3355
  }
2907
3356
  function reportContradictionWarnings(conceptTitle, concept) {
2908
3357
  const refs = concept.contradictedBy;
@@ -2917,7 +3366,7 @@ function reportContradictionWarnings(conceptTitle, concept) {
2917
3366
  // src/compiler/page-renderer.ts
2918
3367
  var RELATED_PAGE_CONTEXT_LIMIT = 5;
2919
3368
  async function renderMergedPageContent(root, entry, schema) {
2920
- const pagePath = path20.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
3369
+ const pagePath = path25.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
2921
3370
  const existingPage = await safeReadFile(pagePath);
2922
3371
  const relatedPages = await loadRelatedPages(root, entry.slug);
2923
3372
  const system = buildPagePrompt(
@@ -2956,17 +3405,17 @@ function buildMergedFrontmatter(entry, existingPage, schema) {
2956
3405
  return buildFrontmatter(frontmatterFields);
2957
3406
  }
2958
3407
  async function loadRelatedPages(root, excludeSlug) {
2959
- const conceptsPath = path20.join(root, CONCEPTS_DIR);
3408
+ const conceptsPath = path25.join(root, CONCEPTS_DIR);
2960
3409
  let files;
2961
3410
  try {
2962
- files = await readdir8(conceptsPath);
3411
+ files = await readdir9(conceptsPath);
2963
3412
  } catch {
2964
3413
  return "";
2965
3414
  }
2966
3415
  const related = files.filter((f) => f.endsWith(".md") && f !== `${excludeSlug}.md`).slice(0, RELATED_PAGE_CONTEXT_LIMIT);
2967
3416
  const contents = [];
2968
3417
  for (const f of related) {
2969
- const content = await safeReadFile(path20.join(conceptsPath, f));
3418
+ const content = await safeReadFile(path25.join(conceptsPath, f));
2970
3419
  if (!content) continue;
2971
3420
  const { meta } = parseFrontmatter(content);
2972
3421
  if (meta.orphaned) continue;
@@ -3020,7 +3469,7 @@ async function generatePagesPhase(root, extractions, frozenSlugs, schema, option
3020
3469
  return entry;
3021
3470
  }))
3022
3471
  );
3023
- return { pages, errors, candidates };
3472
+ return { pages, errors, candidates, seedSlugs: [] };
3024
3473
  }
3025
3474
  async function persistExtractionStates(root, extractions) {
3026
3475
  for (const result of extractions) {
@@ -3046,12 +3495,13 @@ function summarizeCompile(buckets, generation, extractions, options) {
3046
3495
  errors.push(`No concepts extracted from ${result.sourceFile}`);
3047
3496
  }
3048
3497
  }
3498
+ const conceptSlugs = generation.pages.map((entry) => entry.slug);
3049
3499
  const baseResult = {
3050
3500
  compiled: buckets.toCompile.length,
3051
3501
  skipped: buckets.unchanged.length,
3052
3502
  deleted: buckets.deleted.length,
3053
3503
  concepts: generation.pages.map((entry) => entry.concept.concept),
3054
- pages: generation.pages.map((entry) => entry.slug),
3504
+ pages: [...conceptSlugs, ...generation.seedSlugs],
3055
3505
  errors
3056
3506
  };
3057
3507
  if (options.review) {
@@ -3069,12 +3519,21 @@ async function runCompilePipeline(root, options) {
3069
3519
  if (buckets.toCompile.length === 0 && buckets.deleted.length === 0) {
3070
3520
  status("\u2713", success("Nothing to compile \u2014 all sources up to date."));
3071
3521
  if (!options.review) {
3072
- const emptyGeneration = { pages: [], errors: [], candidates: [] };
3522
+ const emptyGeneration = {
3523
+ pages: [],
3524
+ errors: [],
3525
+ candidates: [],
3526
+ seedSlugs: []
3527
+ };
3073
3528
  await generateSeedPages(root, schema, emptyGeneration);
3074
- await finalizeWiki(root, emptyGeneration.pages);
3529
+ await finalizeWiki(root, emptyGeneration.pages, emptyGeneration.seedSlugs);
3075
3530
  return {
3076
3531
  ...emptyCompileResult(),
3077
3532
  skipped: buckets.unchanged.length,
3533
+ // Surface seed-page slugs alongside any errors so downstream
3534
+ // consumers (MCP, embeddings, programmatic callers) can see what
3535
+ // landed even on the no-source-changes early-return path.
3536
+ pages: [...emptyGeneration.seedSlugs],
3078
3537
  errors: emptyGeneration.errors
3079
3538
  };
3080
3539
  }
@@ -3098,7 +3557,7 @@ async function runCompilePipeline(root, options) {
3098
3557
  }
3099
3558
  await persistFrozenSlugs(root, frozenSlugs, extractions);
3100
3559
  await generateSeedPages(root, schema, generation);
3101
- await finalizeWiki(root, generation.pages);
3560
+ await finalizeWiki(root, generation.pages, generation.seedSlugs);
3102
3561
  }
3103
3562
  return summarizeCompile(buckets, generation, extractions, options);
3104
3563
  }
@@ -3135,9 +3594,11 @@ async function runExtractionPhases(root, toCompile, state, allChanges) {
3135
3594
  }
3136
3595
  return extractions;
3137
3596
  }
3138
- async function finalizeWiki(root, pages) {
3139
- const allChangedSlugs = pages.map((entry) => entry.slug);
3140
- const allNewSlugs = pages.filter((entry) => entry.concept.is_new).map((entry) => entry.slug);
3597
+ async function finalizeWiki(root, pages, seedSlugs = []) {
3598
+ const conceptChangedSlugs = pages.map((entry) => entry.slug);
3599
+ const conceptNewSlugs = pages.filter((entry) => entry.concept.is_new).map((entry) => entry.slug);
3600
+ const allChangedSlugs = [...conceptChangedSlugs, ...seedSlugs];
3601
+ const allNewSlugs = [...conceptNewSlugs, ...seedSlugs];
3141
3602
  if (allChangedSlugs.length > 0) {
3142
3603
  status("\u{1F517}", info("Resolving interlinks..."));
3143
3604
  await resolveLinks(root, allChangedSlugs, allNewSlugs);
@@ -3167,9 +3628,9 @@ function printChangesSummary(changes) {
3167
3628
  }
3168
3629
  async function extractForSource(root, sourceFile) {
3169
3630
  status("*", info(`Extracting: ${sourceFile}`));
3170
- const sourcePath = path21.join(root, SOURCES_DIR, sourceFile);
3171
- const sourceContent = await readFile14(sourcePath, "utf-8");
3172
- const existingIndex = await safeReadFile(path21.join(root, INDEX_FILE));
3631
+ const sourcePath = path26.join(root, SOURCES_DIR, sourceFile);
3632
+ const sourceContent = await readFile18(sourcePath, "utf-8");
3633
+ const existingIndex = await safeReadFile(path26.join(root, INDEX_FILE));
3173
3634
  const concepts = await extractConcepts(sourceContent, existingIndex);
3174
3635
  if (concepts.length > 0) {
3175
3636
  const names = concepts.map((c) => c.concept).join(", ");
@@ -3192,13 +3653,11 @@ function reconcileConceptMetadata(existing, incoming) {
3192
3653
  }
3193
3654
  }
3194
3655
  reconciled.contradictedBy = refs.length > 0 ? refs : void 0;
3195
- if (typeof incoming.inferredParagraphs === "number") {
3196
- reconciled.inferredParagraphs = typeof existing.inferredParagraphs === "number" ? Math.max(existing.inferredParagraphs, incoming.inferredParagraphs) : incoming.inferredParagraphs;
3197
- }
3198
3656
  return reconciled;
3199
3657
  }
3200
3658
  function mergeExtractions(extractions, frozenSlugs) {
3201
3659
  const bySlug = /* @__PURE__ */ new Map();
3660
+ const slicesBySlug = /* @__PURE__ */ new Map();
3202
3661
  for (const result of extractions) {
3203
3662
  if (result.concepts.length === 0) continue;
3204
3663
  for (const concept of result.concepts) {
@@ -3208,23 +3667,28 @@ function mergeExtractions(extractions, frozenSlugs) {
3208
3667
  if (existing) {
3209
3668
  existing.concept = reconcileConceptMetadata(existing.concept, concept);
3210
3669
  existing.sourceFiles.push(result.sourceFile);
3211
- existing.combinedContent += `
3212
-
3213
- --- SOURCE: ${result.sourceFile} ---
3214
-
3215
- ${result.sourceContent}`;
3216
3670
  } else {
3217
3671
  bySlug.set(slug, {
3218
3672
  slug,
3219
3673
  concept,
3220
3674
  sourceFiles: [result.sourceFile],
3221
- combinedContent: `--- SOURCE: ${result.sourceFile} ---
3222
-
3223
- ${result.sourceContent}`
3675
+ combinedContent: ""
3224
3676
  });
3677
+ slicesBySlug.set(slug, []);
3225
3678
  }
3679
+ slicesBySlug.get(slug).push({
3680
+ file: result.sourceFile,
3681
+ content: result.sourceContent
3682
+ });
3226
3683
  }
3227
3684
  }
3685
+ for (const merged of bySlug.values()) {
3686
+ const slices = slicesBySlug.get(merged.slug) ?? [];
3687
+ merged.combinedContent = buildBudgetedCombinedContent(
3688
+ merged.concept.concept,
3689
+ slices
3690
+ );
3691
+ }
3228
3692
  return Array.from(bySlug.values());
3229
3693
  }
3230
3694
  async function generateMergedPage(root, entry, schema, options, sourceStates) {
@@ -3232,13 +3696,18 @@ async function generateMergedPage(root, entry, schema, options, sourceStates) {
3232
3696
  if (options.review) {
3233
3697
  return await persistReviewCandidate(root, entry, fullPage, sourceStates, schema);
3234
3698
  }
3235
- const pagePath = path21.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
3699
+ const pagePath = path26.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
3236
3700
  const error2 = await writePageIfValid(pagePath, fullPage, entry.concept.concept);
3237
3701
  return { error: error2 ?? void 0 };
3238
3702
  }
3239
3703
  async function persistReviewCandidate(root, entry, fullPage, sourceStates, schema) {
3240
3704
  const virtualPath = `wiki/concepts/${entry.slug}.md`;
3241
- const violations = checkPageCrossLinks(fullPage, virtualPath, schema);
3705
+ const schemaViolations = checkPageCrossLinks(fullPage, virtualPath, schema);
3706
+ const provenanceViolations = await collectCandidateProvenanceViolations(
3707
+ root,
3708
+ fullPage,
3709
+ virtualPath
3710
+ );
3242
3711
  const candidate = await writeCandidate(root, {
3243
3712
  title: entry.concept.concept,
3244
3713
  slug: entry.slug,
@@ -3246,21 +3715,35 @@ async function persistReviewCandidate(root, entry, fullPage, sourceStates, schem
3246
3715
  sources: entry.sourceFiles,
3247
3716
  body: fullPage,
3248
3717
  sourceStates: pickStatesForSources(sourceStates, entry.sourceFiles),
3249
- schemaViolations: violations.length > 0 ? violations : void 0
3718
+ schemaViolations: schemaViolations.length > 0 ? schemaViolations : void 0,
3719
+ provenanceViolations: provenanceViolations.length > 0 ? provenanceViolations : void 0
3250
3720
  });
3251
3721
  status("?", info(`Candidate ready: ${candidate.id} (${entry.slug})`));
3252
3722
  return { candidateId: candidate.id };
3253
3723
  }
3724
+ async function collectCandidateProvenanceViolations(root, fullPage, virtualPath) {
3725
+ const malformed = checkPageMalformedCitations(fullPage, virtualPath);
3726
+ const broken = await checkPageBrokenCitations(
3727
+ fullPage,
3728
+ virtualPath,
3729
+ path26.join(root, SOURCES_DIR)
3730
+ );
3731
+ return [...malformed, ...broken];
3732
+ }
3254
3733
  async function generateSeedPages(root, schema, generation) {
3255
3734
  if (schema.seedPages.length === 0) return;
3256
3735
  for (const seed of schema.seedPages) {
3257
- const error2 = await generateSingleSeedPage(root, schema, seed);
3258
- if (error2) generation.errors.push(error2);
3736
+ const result = await generateSingleSeedPage(root, schema, seed);
3737
+ if (result.error) {
3738
+ generation.errors.push(result.error);
3739
+ continue;
3740
+ }
3741
+ generation.seedSlugs.push(result.slug);
3259
3742
  }
3260
3743
  }
3261
3744
  async function generateSingleSeedPage(root, schema, seed) {
3262
3745
  const slug = slugify(seed.title);
3263
- const pagePath = path21.join(root, CONCEPTS_DIR, `${slug}.md`);
3746
+ const pagePath = path26.join(root, CONCEPTS_DIR, `${slug}.md`);
3264
3747
  const relatedContent = await loadSeedRelatedPages(root, seed.relatedSlugs ?? []);
3265
3748
  const rule = schema.kinds[seed.kind];
3266
3749
  const system = buildSeedPagePrompt(seed, rule, relatedContent);
@@ -3283,16 +3766,17 @@ async function generateSingleSeedPage(root, schema, seed) {
3283
3766
  const frontmatterFields = { ...typedFields };
3284
3767
  addObsidianMeta(frontmatterFields, seed.title, []);
3285
3768
  const frontmatter = buildFrontmatter(frontmatterFields);
3286
- return await writePageIfValid(pagePath, `${frontmatter}
3769
+ const error2 = await writePageIfValid(pagePath, `${frontmatter}
3287
3770
 
3288
3771
  ${pageBody}
3289
3772
  `, seed.title);
3773
+ return error2 ? { slug, error: error2 } : { slug };
3290
3774
  }
3291
3775
  async function loadSeedRelatedPages(root, slugs) {
3292
3776
  if (slugs.length === 0) return "";
3293
3777
  const contents = [];
3294
3778
  for (const slug of slugs) {
3295
- const pagePath = path21.join(root, CONCEPTS_DIR, `${slug}.md`);
3779
+ const pagePath = path26.join(root, CONCEPTS_DIR, `${slug}.md`);
3296
3780
  const content = await safeReadFile(pagePath);
3297
3781
  if (content) contents.push(content);
3298
3782
  }
@@ -3347,7 +3831,7 @@ async function compileCommand(options = {}) {
3347
3831
 
3348
3832
  // src/commands/query.ts
3349
3833
  import { existsSync as existsSync8 } from "fs";
3350
- import path22 from "path";
3834
+ import path27 from "path";
3351
3835
  var PAGE_DIRS = [CONCEPTS_DIR, QUERIES_DIR];
3352
3836
  var PAGE_SELECTION_TOOL = {
3353
3837
  name: "select_pages",
@@ -3404,7 +3888,7 @@ async function selectRelevantPages(root, question, debug) {
3404
3888
  const { pages: rawPages2, reasoning: reasoning2 } = await selectPages(question, filteredIndex);
3405
3889
  return { pages: rawPages2, rawPages: rawPages2, reasoning: reasoning2, chunks: [] };
3406
3890
  }
3407
- const indexContent = await safeReadFile(path22.join(root, INDEX_FILE));
3891
+ const indexContent = await safeReadFile(path27.join(root, INDEX_FILE));
3408
3892
  const { pages: rawPages, reasoning } = await selectPages(question, indexContent);
3409
3893
  return { pages: rawPages.map((p) => slugify(p)), rawPages, reasoning, chunks: [] };
3410
3894
  }
@@ -3496,7 +3980,7 @@ async function loadSelectedPages(root, slugs) {
3496
3980
  for (const slug of slugs) {
3497
3981
  let content = "";
3498
3982
  for (const dir of PAGE_DIRS) {
3499
- const candidate = await safeReadFile(path22.join(root, dir, `${slug}.md`));
3983
+ const candidate = await safeReadFile(path27.join(root, dir, `${slug}.md`));
3500
3984
  if (!candidate) continue;
3501
3985
  const { meta } = parseFrontmatter(candidate);
3502
3986
  if (meta.orphaned) continue;
@@ -3512,7 +3996,11 @@ ${content}`);
3512
3996
  }
3513
3997
  return sections.join("\n\n");
3514
3998
  }
3515
- var ANSWER_SYSTEM_PROMPT = "You are a knowledge assistant. Answer the question using ONLY the wiki content provided. Cite specific pages using [[Page Title]] wikilinks. If the wiki doesn't contain enough information, say so.";
3999
+ var ANSWER_SYSTEM_PROMPT_BASE = "You are a knowledge assistant. Answer the question using ONLY the wiki content provided. Cite specific pages using [[Page Title]] wikilinks. If the wiki doesn't contain enough information, say so.";
4000
+ function buildAnswerSystemPrompt() {
4001
+ const lang = languageDirective();
4002
+ return lang ? `${ANSWER_SYSTEM_PROMPT_BASE} ${lang}` : ANSWER_SYSTEM_PROMPT_BASE;
4003
+ }
3516
4004
  async function callAnswerLLM(question, pagesContent, chunks, onToken) {
3517
4005
  const provenance = chunks.length > 0 ? buildChunkProvenance(chunks) : "";
3518
4006
  const userMessage = `Question: ${question}
@@ -3520,7 +4008,7 @@ async function callAnswerLLM(question, pagesContent, chunks, onToken) {
3520
4008
  Relevant wiki pages:
3521
4009
  ${pagesContent}${provenance}`;
3522
4010
  return callClaude({
3523
- system: ANSWER_SYSTEM_PROMPT,
4011
+ system: buildAnswerSystemPrompt(),
3524
4012
  messages: [{ role: "user", content: userMessage }],
3525
4013
  stream: Boolean(onToken),
3526
4014
  onToken
@@ -3543,7 +4031,7 @@ function summarizeAnswer(answer) {
3543
4031
  }
3544
4032
  async function saveQueryPage(root, question, answer) {
3545
4033
  const slug = slugify(question);
3546
- const filePath = path22.join(root, QUERIES_DIR, `${slug}.md`);
4034
+ const filePath = path27.join(root, QUERIES_DIR, `${slug}.md`);
3547
4035
  const frontmatter = buildFrontmatter({
3548
4036
  title: question,
3549
4037
  summary: summarizeAnswer(answer),
@@ -3569,7 +4057,7 @@ ${answer}
3569
4057
  return slug;
3570
4058
  }
3571
4059
  async function generateAnswer(root, question, options = {}) {
3572
- if (!existsSync8(path22.join(root, INDEX_FILE))) {
4060
+ if (!existsSync8(path27.join(root, INDEX_FILE))) {
3573
4061
  throw new Error("Wiki index not found. Run `llmwiki compile` first.");
3574
4062
  }
3575
4063
  const selection = await selectRelevantPages(root, question, Boolean(options.debug));
@@ -3597,7 +4085,7 @@ function buildEmptyResult(selection) {
3597
4085
  };
3598
4086
  }
3599
4087
  async function queryCommand(root, question, options) {
3600
- if (!existsSync8(path22.join(root, INDEX_FILE))) {
4088
+ if (!existsSync8(path27.join(root, INDEX_FILE))) {
3601
4089
  status("!", error("Wiki index not found. Run `llmwiki compile` first."));
3602
4090
  return;
3603
4091
  }
@@ -3648,10 +4136,10 @@ var DEBUG_CHUNK_PREVIEW_CHARS = 120;
3648
4136
  // src/commands/watch.ts
3649
4137
  import { watch as chokidarWatch } from "chokidar";
3650
4138
  import { existsSync as existsSync9 } from "fs";
3651
- import path23 from "path";
4139
+ import path28 from "path";
3652
4140
  var DEBOUNCE_MS = 500;
3653
4141
  async function watchCommand() {
3654
- const sourcesPath = path23.resolve(SOURCES_DIR);
4142
+ const sourcesPath = path28.resolve(SOURCES_DIR);
3655
4143
  if (!existsSync9(sourcesPath)) {
3656
4144
  status(
3657
4145
  "!",
@@ -3686,7 +4174,7 @@ async function watchCommand() {
3686
4174
  const scheduleCompile = (eventPath, event) => {
3687
4175
  status(
3688
4176
  "~",
3689
- dim(`${event}: ${path23.basename(eventPath)}`)
4177
+ dim(`${event}: ${path28.basename(eventPath)}`)
3690
4178
  );
3691
4179
  if (debounceTimer) clearTimeout(debounceTimer);
3692
4180
  debounceTimer = setTimeout(triggerCompile, DEBOUNCE_MS);
@@ -3770,10 +4258,388 @@ async function lintCommand() {
3770
4258
  }
3771
4259
  }
3772
4260
 
4261
+ // src/commands/export.ts
4262
+ import path30 from "path";
4263
+ import { createRequire } from "module";
4264
+
4265
+ // src/export/collect.ts
4266
+ import { readdir as readdir10, readFile as readFile19 } from "fs/promises";
4267
+ import path29 from "path";
4268
+ var WIKILINK_RE = /\[\[([^\]|]+)(?:\|[^\]]+)?\]\]/g;
4269
+ function extractWikilinkSlugs(body) {
4270
+ const slugs = /* @__PURE__ */ new Set();
4271
+ let match;
4272
+ while ((match = WIKILINK_RE.exec(body)) !== null) {
4273
+ slugs.add(slugify(match[1].trim()));
4274
+ }
4275
+ return [...slugs];
4276
+ }
4277
+ async function parsePageFile(filePath, slug, pageDirectory) {
4278
+ let raw;
4279
+ try {
4280
+ raw = await readFile19(filePath, "utf-8");
4281
+ } catch {
4282
+ return null;
4283
+ }
4284
+ const { meta, body } = parseFrontmatter(raw);
4285
+ if (!meta.title || typeof meta.title !== "string") return null;
4286
+ if (meta.orphaned === true) return null;
4287
+ return {
4288
+ title: meta.title,
4289
+ slug,
4290
+ pageDirectory,
4291
+ summary: typeof meta.summary === "string" ? meta.summary : "",
4292
+ sources: Array.isArray(meta.sources) ? meta.sources.filter((s) => typeof s === "string") : [],
4293
+ tags: Array.isArray(meta.tags) ? meta.tags.filter((t) => typeof t === "string") : [],
4294
+ createdAt: typeof meta.createdAt === "string" ? meta.createdAt : (/* @__PURE__ */ new Date()).toISOString(),
4295
+ updatedAt: typeof meta.updatedAt === "string" ? meta.updatedAt : (/* @__PURE__ */ new Date()).toISOString(),
4296
+ links: extractWikilinkSlugs(body),
4297
+ body
4298
+ };
4299
+ }
4300
+ async function collectFromDir(dirPath, pageDirectory) {
4301
+ let files;
4302
+ try {
4303
+ files = await readdir10(dirPath);
4304
+ } catch {
4305
+ return [];
4306
+ }
4307
+ const pages = [];
4308
+ for (const file of files.filter((f) => f.endsWith(".md"))) {
4309
+ const slug = file.replace(/\.md$/, "");
4310
+ const page = await parsePageFile(path29.join(dirPath, file), slug, pageDirectory);
4311
+ if (page) pages.push(page);
4312
+ }
4313
+ return pages;
4314
+ }
4315
+ async function collectExportPages(root) {
4316
+ const conceptsPath = path29.join(root, CONCEPTS_DIR);
4317
+ const queriesPath = path29.join(root, QUERIES_DIR);
4318
+ const [concepts, queries] = await Promise.all([
4319
+ collectFromDir(conceptsPath, "concepts"),
4320
+ collectFromDir(queriesPath, "queries")
4321
+ ]);
4322
+ const all = [...concepts, ...queries];
4323
+ all.sort((a, b) => a.title.localeCompare(b.title));
4324
+ return all;
4325
+ }
4326
+
4327
+ // src/export/llms-txt.ts
4328
+ function pageRelativePath(page) {
4329
+ return `wiki/${page.pageDirectory}/${page.slug}.md`;
4330
+ }
4331
+ function buildEntryNote(page) {
4332
+ const parts = [];
4333
+ if (page.summary) parts.push(page.summary);
4334
+ if (page.tags.length > 0) parts.push(`tags: ${page.tags.join(", ")}`);
4335
+ if (page.sources.length > 0) parts.push(`sources: ${page.sources.join(", ")}`);
4336
+ parts.push(`created: ${page.createdAt}`);
4337
+ parts.push(`updated: ${page.updatedAt}`);
4338
+ return parts.join(" | ");
4339
+ }
4340
+ function formatPageEntry(page) {
4341
+ const note = buildEntryNote(page);
4342
+ return `- [${page.title}](${pageRelativePath(page)}): ${note}`;
4343
+ }
4344
+ function buildSection(heading, pages) {
4345
+ if (pages.length === 0) return [];
4346
+ return [`## ${heading}`, "", ...pages.map(formatPageEntry), ""];
4347
+ }
4348
+ function buildLlmsTxt(pages, projectTitle) {
4349
+ const concepts = pages.filter((p) => p.pageDirectory === "concepts");
4350
+ const queries = pages.filter((p) => p.pageDirectory === "queries");
4351
+ const lines = [
4352
+ `# ${projectTitle}`,
4353
+ "",
4354
+ `> ${pages.length} pages \u2014 exported ${(/* @__PURE__ */ new Date()).toISOString()}`,
4355
+ "",
4356
+ ...buildSection("Concepts", concepts),
4357
+ ...buildSection("Saved Queries", queries)
4358
+ ];
4359
+ return lines.join("\n");
4360
+ }
4361
+ function buildLlmsFullTxt(pages, projectTitle) {
4362
+ const sections = [buildLlmsTxt(pages, projectTitle)];
4363
+ for (const page of pages) {
4364
+ const tags = page.tags.length > 0 ? `
4365
+ Tags: ${page.tags.join(", ")}` : "";
4366
+ const sources = page.sources.length > 0 ? `
4367
+ Sources: ${page.sources.join(", ")}` : "";
4368
+ const header2 = [
4369
+ "---",
4370
+ `## ${page.title}`,
4371
+ `> ${page.summary}${tags}${sources}`,
4372
+ `Created: ${page.createdAt} | Updated: ${page.updatedAt}`,
4373
+ ""
4374
+ ].join("\n");
4375
+ sections.push(`${header2}
4376
+ ${page.body.trim()}
4377
+ `);
4378
+ }
4379
+ return sections.join("\n");
4380
+ }
4381
+
4382
+ // src/export/json-export.ts
4383
+ function buildJsonExport(pages) {
4384
+ const doc = {
4385
+ exportedAt: (/* @__PURE__ */ new Date()).toISOString(),
4386
+ pageCount: pages.length,
4387
+ pages
4388
+ };
4389
+ return JSON.stringify(doc, null, 2);
4390
+ }
4391
+
4392
+ // src/export/json-ld.ts
4393
+ var LOCAL_BASE = "urn:llmwiki:";
4394
+ function pageIri(slug) {
4395
+ return `${LOCAL_BASE}${slug}`;
4396
+ }
4397
+ function pageToJsonLd(page) {
4398
+ const node = {
4399
+ "@id": pageIri(page.slug),
4400
+ "@type": "Article",
4401
+ name: page.title,
4402
+ description: page.summary,
4403
+ dateCreated: page.createdAt,
4404
+ dateModified: page.updatedAt
4405
+ };
4406
+ if (page.tags.length > 0) {
4407
+ node["keywords"] = page.tags;
4408
+ }
4409
+ if (page.sources.length > 0) {
4410
+ node["isBasedOn"] = page.sources;
4411
+ }
4412
+ if (page.links.length > 0) {
4413
+ node["mentions"] = page.links.map((slug) => ({ "@id": pageIri(slug) }));
4414
+ }
4415
+ return node;
4416
+ }
4417
+ function buildJsonLd(pages) {
4418
+ const doc = {
4419
+ "@context": "https://schema.org",
4420
+ "@graph": pages.map(pageToJsonLd)
4421
+ };
4422
+ return JSON.stringify(doc, null, 2);
4423
+ }
4424
+
4425
+ // src/export/graphml.ts
4426
+ var XML_ESCAPES = {
4427
+ "&": "&amp;",
4428
+ "<": "&lt;",
4429
+ ">": "&gt;",
4430
+ '"': "&quot;",
4431
+ "'": "&apos;"
4432
+ };
4433
+ function escapeXml(value) {
4434
+ return value.replace(/[&<>"']/g, (ch) => XML_ESCAPES[ch] ?? ch);
4435
+ }
4436
+ var KEY_DEFS = [
4437
+ '<key id="title" for="node" attr.name="title" attr.type="string"/>',
4438
+ '<key id="summary" for="node" attr.name="summary" attr.type="string"/>',
4439
+ '<key id="tags" for="node" attr.name="tags" attr.type="string"/>',
4440
+ '<key id="sources" for="node" attr.name="sources" attr.type="string"/>',
4441
+ '<key id="createdAt" for="node" attr.name="createdAt" attr.type="string"/>',
4442
+ '<key id="updatedAt" for="node" attr.name="updatedAt" attr.type="string"/>'
4443
+ ].join("\n ");
4444
+ function pageToNode(page) {
4445
+ const tags = page.tags.join(", ");
4446
+ const sources = page.sources.join(", ");
4447
+ return [
4448
+ ` <node id="${escapeXml(page.slug)}">`,
4449
+ ` <data key="title">${escapeXml(page.title)}</data>`,
4450
+ ` <data key="summary">${escapeXml(page.summary)}</data>`,
4451
+ ` <data key="tags">${escapeXml(tags)}</data>`,
4452
+ ` <data key="sources">${escapeXml(sources)}</data>`,
4453
+ ` <data key="createdAt">${escapeXml(page.createdAt)}</data>`,
4454
+ ` <data key="updatedAt">${escapeXml(page.updatedAt)}</data>`,
4455
+ ` </node>`
4456
+ ].join("\n");
4457
+ }
4458
+ function pageToEdges(page, knownSlugs) {
4459
+ return page.links.filter((slug) => knownSlugs.has(slug)).map(
4460
+ (slug) => ` <edge source="${escapeXml(page.slug)}" target="${escapeXml(slug)}"/>`
4461
+ );
4462
+ }
4463
+ function buildGraphml(pages) {
4464
+ const knownSlugs = new Set(pages.map((p) => p.slug));
4465
+ const nodes = pages.map(pageToNode).join("\n");
4466
+ const edges = pages.flatMap((p) => pageToEdges(p, knownSlugs)).join("\n");
4467
+ return [
4468
+ '<?xml version="1.0" encoding="UTF-8"?>',
4469
+ '<graphml xmlns="http://graphml.graphdrawing.org/graphml"',
4470
+ ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"',
4471
+ ' xsi:schemaLocation="http://graphml.graphdrawing.org/graphml',
4472
+ ' http://graphml.graphdrawing.org/graphml/1.0/graphml.xsd">',
4473
+ ` ${KEY_DEFS}`,
4474
+ ' <graph id="wiki" edgedefault="directed">',
4475
+ nodes,
4476
+ edges,
4477
+ " </graph>",
4478
+ "</graphml>",
4479
+ ""
4480
+ ].join("\n");
4481
+ }
4482
+
4483
+ // src/export/marp.ts
4484
+ var SLIDE_BODY_MAX_CHARS = 300;
4485
+ function extractFirstParagraph(body) {
4486
+ const trimmed = body.trim();
4487
+ const firstBlock = trimmed.split(/\n\s*\n/)[0] ?? "";
4488
+ const stripped = firstBlock.replace(/^#{1,6}\s+/gm, "").replace(/^[-*+]\s+/gm, "").trim();
4489
+ if (stripped.length <= SLIDE_BODY_MAX_CHARS) return stripped;
4490
+ return `${stripped.slice(0, SLIDE_BODY_MAX_CHARS)}\u2026`;
4491
+ }
4492
+ function buildSpeakerNotes(page) {
4493
+ const parts = [`created: ${page.createdAt}`, `updated: ${page.updatedAt}`];
4494
+ if (page.sources.length > 0) parts.push(`sources: ${page.sources.join(", ")}`);
4495
+ return `<!-- ${parts.join(" | ")} -->`;
4496
+ }
4497
+ function pageToSlide(page) {
4498
+ const tagLine = page.tags.length > 0 ? `
4499
+ _Tags: ${page.tags.join(", ")}_` : "";
4500
+ const excerpt = extractFirstParagraph(page.body);
4501
+ const notes = buildSpeakerNotes(page);
4502
+ return [
4503
+ `## ${page.title}`,
4504
+ "",
4505
+ `> ${page.summary}${tagLine}`,
4506
+ "",
4507
+ excerpt,
4508
+ "",
4509
+ notes
4510
+ ].join("\n");
4511
+ }
4512
+ function filterBySource(pages, source2) {
4513
+ if (source2 === "all") return pages;
4514
+ return pages.filter((p) => p.pageDirectory === source2);
4515
+ }
4516
+ function buildMarp(pages, projectTitle, source2 = "all") {
4517
+ const filtered = filterBySource(pages, source2);
4518
+ const frontmatter = [
4519
+ "---",
4520
+ "marp: true",
4521
+ "theme: default",
4522
+ "paginate: true",
4523
+ `title: "${projectTitle}"`,
4524
+ "---"
4525
+ ].join("\n");
4526
+ const titleSlide = [
4527
+ "",
4528
+ `# ${projectTitle}`,
4529
+ "",
4530
+ `${filtered.length} pages | ${(/* @__PURE__ */ new Date()).toISOString()}`
4531
+ ].join("\n");
4532
+ const slides = filtered.map((p) => `---
4533
+
4534
+ ${pageToSlide(p)}`);
4535
+ return [frontmatter, titleSlide, ...slides, ""].join("\n\n");
4536
+ }
4537
+
4538
+ // src/export/types.ts
4539
+ var MARP_SOURCES = ["concepts", "queries", "all"];
4540
+ var EXPORT_TARGETS = [
4541
+ "llms-txt",
4542
+ "llms-full-txt",
4543
+ "json",
4544
+ "json-ld",
4545
+ "graphml",
4546
+ "marp"
4547
+ ];
4548
+
4549
+ // src/commands/export.ts
4550
+ var require2 = createRequire(import.meta.url);
4551
+ var EXPORT_DIR = "dist/exports";
4552
+ var TARGET_FILENAMES = {
4553
+ "llms-txt": "llms.txt",
4554
+ "llms-full-txt": "llms-full.txt",
4555
+ json: "wiki.json",
4556
+ "json-ld": "wiki.jsonld",
4557
+ graphml: "wiki.graphml",
4558
+ marp: "wiki.md"
4559
+ };
4560
+ function resolveProjectTitle(root) {
4561
+ try {
4562
+ const pkg = require2(path30.join(root, "package.json"));
4563
+ return typeof pkg.name === "string" ? pkg.name : "Knowledge Wiki";
4564
+ } catch {
4565
+ return "Knowledge Wiki";
4566
+ }
4567
+ }
4568
+ function isValidTarget(value) {
4569
+ return EXPORT_TARGETS.includes(value);
4570
+ }
4571
+ function isValidMarpSource(value) {
4572
+ return MARP_SOURCES.includes(value);
4573
+ }
4574
+ function resolveMarpSource(rawSource) {
4575
+ if (!rawSource) return "all";
4576
+ if (!isValidMarpSource(rawSource)) {
4577
+ throw new Error(
4578
+ `Unknown --source value "${rawSource}". Valid values: ${MARP_SOURCES.join(", ")}`
4579
+ );
4580
+ }
4581
+ return rawSource;
4582
+ }
4583
+ function buildContent(target, pages, projectTitle, marpSource) {
4584
+ switch (target) {
4585
+ case "llms-txt":
4586
+ return buildLlmsTxt(pages, projectTitle);
4587
+ case "llms-full-txt":
4588
+ return buildLlmsFullTxt(pages, projectTitle);
4589
+ case "json":
4590
+ return buildJsonExport(pages);
4591
+ case "json-ld":
4592
+ return buildJsonLd(pages);
4593
+ case "graphml":
4594
+ return buildGraphml(pages);
4595
+ case "marp":
4596
+ return buildMarp(pages, projectTitle, marpSource);
4597
+ }
4598
+ }
4599
+ function computeReportedPageCount(pages, targets, marpSource) {
4600
+ const onlyMarpTarget = targets.length === 1 && targets[0] === "marp";
4601
+ if (onlyMarpTarget && marpSource !== "all") {
4602
+ return pages.filter((p) => p.pageDirectory === marpSource).length;
4603
+ }
4604
+ return pages.length;
4605
+ }
4606
+ async function runExport(root, options = {}) {
4607
+ const pages = await collectExportPages(root);
4608
+ const projectTitle = resolveProjectTitle(root);
4609
+ const targets = resolveTargets(options.target);
4610
+ const marpSource = resolveMarpSource(options.source);
4611
+ const written = [];
4612
+ for (const target of targets) {
4613
+ const content = buildContent(target, pages, projectTitle, marpSource);
4614
+ const outPath = path30.join(root, EXPORT_DIR, TARGET_FILENAMES[target]);
4615
+ await atomicWrite(outPath, content);
4616
+ written.push(outPath);
4617
+ status("+", success(`Exported ${target} \u2192 ${source(outPath)}`));
4618
+ }
4619
+ return { written, pageCount: computeReportedPageCount(pages, targets, marpSource) };
4620
+ }
4621
+ function resolveTargets(rawTarget) {
4622
+ if (!rawTarget) return [...EXPORT_TARGETS];
4623
+ if (!isValidTarget(rawTarget)) {
4624
+ throw new Error(
4625
+ `Unknown export target "${rawTarget}". Valid targets: ${EXPORT_TARGETS.join(", ")}`
4626
+ );
4627
+ }
4628
+ return [rawTarget];
4629
+ }
4630
+ async function exportCommand(root, options) {
4631
+ header("Exporting wiki");
4632
+ const { written, pageCount } = await runExport(root, options);
4633
+ status(
4634
+ "\u2713",
4635
+ success(`Done \u2014 ${pageCount} pages exported to ${written.length} file(s).`)
4636
+ );
4637
+ }
4638
+
3773
4639
  // src/commands/schema.ts
3774
4640
  import { existsSync as existsSync10 } from "fs";
3775
4641
  import { mkdir as mkdir6, writeFile as writeFile5 } from "fs/promises";
3776
- import path24 from "path";
4642
+ import path31 from "path";
3777
4643
  async function schemaInitCommand() {
3778
4644
  const root = process.cwd();
3779
4645
  const defaults = buildDefaultSchema();
@@ -3782,7 +4648,7 @@ async function schemaInitCommand() {
3782
4648
  status("!", warn(`Schema file already exists at ${targetPath}`));
3783
4649
  return;
3784
4650
  }
3785
- await mkdir6(path24.dirname(targetPath), { recursive: true });
4651
+ await mkdir6(path31.dirname(targetPath), { recursive: true });
3786
4652
  const serializable = {
3787
4653
  version: defaults.version,
3788
4654
  defaultKind: defaults.defaultKind,
@@ -3838,10 +4704,17 @@ async function reviewShowCommand(id) {
3838
4704
  status("!", warn(`[${v.severity}] ${v.message}`));
3839
4705
  }
3840
4706
  }
4707
+ if (candidate.provenanceViolations && candidate.provenanceViolations.length > 0) {
4708
+ console.log();
4709
+ header("Provenance violations");
4710
+ for (const v of candidate.provenanceViolations) {
4711
+ status("!", warn(`[${v.severity}] ${v.message}`));
4712
+ }
4713
+ }
3841
4714
  }
3842
4715
 
3843
4716
  // src/commands/review-approve.ts
3844
- import path25 from "path";
4717
+ import path32 from "path";
3845
4718
 
3846
4719
  // src/commands/review-helpers.ts
3847
4720
  async function runReviewUnderLock(id, underLock) {
@@ -3873,7 +4746,7 @@ async function approveUnderLock(root, id) {
3873
4746
  process.exitCode = 1;
3874
4747
  return;
3875
4748
  }
3876
- const pagePath = path25.join(root, CONCEPTS_DIR, `${candidate.slug}.md`);
4749
+ const pagePath = path32.join(root, CONCEPTS_DIR, `${candidate.slug}.md`);
3877
4750
  await atomicWrite(pagePath, candidate.body);
3878
4751
  status("+", success(`Approved \u2192 ${source(pagePath)}`));
3879
4752
  await persistCandidateSourceStates(root, candidate);
@@ -3933,7 +4806,7 @@ import { McpServer as McpServer2 } from "@modelcontextprotocol/sdk/server/mcp.js
3933
4806
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
3934
4807
 
3935
4808
  // src/mcp/tools.ts
3936
- import path26 from "path";
4809
+ import path33 from "path";
3937
4810
  import { z } from "zod";
3938
4811
 
3939
4812
  // src/mcp/provider-check.ts
@@ -4069,7 +4942,7 @@ async function pickSearchSlugs(root, question) {
4069
4942
  if (candidates.length > 0) return candidates.map((c) => c.slug);
4070
4943
  } catch {
4071
4944
  }
4072
- const indexContent = await safeReadFile(path26.join(root, INDEX_FILE));
4945
+ const indexContent = await safeReadFile(path33.join(root, INDEX_FILE));
4073
4946
  const { pages } = await selectPages(question, indexContent);
4074
4947
  return pages;
4075
4948
  }
@@ -4128,8 +5001,8 @@ function registerStatusTool(server, root) {
4128
5001
  );
4129
5002
  }
4130
5003
  async function collectStatus(root) {
4131
- const concepts = await collectPageSummaries(path26.join(root, CONCEPTS_DIR));
4132
- const queries = await collectPageSummaries(path26.join(root, QUERIES_DIR));
5004
+ const concepts = await collectPageSummaries(path33.join(root, CONCEPTS_DIR));
5005
+ const queries = await collectPageSummaries(path33.join(root, QUERIES_DIR));
4133
5006
  const state = await readState(root);
4134
5007
  const changes = await detectChanges(root, state);
4135
5008
  const orphans = await findOrphanedSlugs(root);
@@ -4146,7 +5019,7 @@ async function collectStatus(root) {
4146
5019
  };
4147
5020
  }
4148
5021
  async function findOrphanedSlugs(root) {
4149
- const scanned = await scanWikiPages(path26.join(root, CONCEPTS_DIR));
5022
+ const scanned = await scanWikiPages(path33.join(root, CONCEPTS_DIR));
4150
5023
  return scanned.filter(({ meta }) => meta.orphaned).map(({ slug }) => slug);
4151
5024
  }
4152
5025
  async function loadPageRecords(root, slugs) {
@@ -4159,7 +5032,7 @@ async function loadPageRecords(root, slugs) {
4159
5032
  }
4160
5033
  async function readPage(root, slug) {
4161
5034
  for (const dir of PAGE_DIRS2) {
4162
- const content = await safeReadFile(path26.join(root, dir, `${slug}.md`));
5035
+ const content = await safeReadFile(path33.join(root, dir, `${slug}.md`));
4163
5036
  if (!content) continue;
4164
5037
  const { meta, body } = parseFrontmatter(content);
4165
5038
  if (meta.orphaned) continue;
@@ -4174,8 +5047,8 @@ async function readPage(root, slug) {
4174
5047
  }
4175
5048
 
4176
5049
  // src/mcp/resources.ts
4177
- import path27 from "path";
4178
- import { readdir as readdir9 } from "fs/promises";
5050
+ import path34 from "path";
5051
+ import { readdir as readdir11 } from "fs/promises";
4179
5052
  import { ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
4180
5053
  function jsonContent(uri, payload) {
4181
5054
  return {
@@ -4208,7 +5081,7 @@ function registerIndexResource(server, root) {
4208
5081
  mimeType: "text/markdown"
4209
5082
  },
4210
5083
  async (uri) => {
4211
- const content = await safeReadFile(path27.join(root, INDEX_FILE));
5084
+ const content = await safeReadFile(path34.join(root, INDEX_FILE));
4212
5085
  return { contents: [markdownContent(uri, content)] };
4213
5086
  }
4214
5087
  );
@@ -4275,23 +5148,23 @@ function registerQueryResource(server, root) {
4275
5148
  );
4276
5149
  }
4277
5150
  async function listSources(root) {
4278
- const sourcesPath = path27.join(root, SOURCES_DIR);
5151
+ const sourcesPath = path34.join(root, SOURCES_DIR);
4279
5152
  let files;
4280
5153
  try {
4281
- files = await readdir9(sourcesPath);
5154
+ files = await readdir11(sourcesPath);
4282
5155
  } catch {
4283
5156
  return [];
4284
5157
  }
4285
5158
  const records = [];
4286
5159
  for (const file of files.filter((f) => f.endsWith(".md"))) {
4287
- const content = await safeReadFile(path27.join(sourcesPath, file));
5160
+ const content = await safeReadFile(path34.join(sourcesPath, file));
4288
5161
  const { meta } = parseFrontmatter(content);
4289
5162
  records.push({ filename: file, ...meta });
4290
5163
  }
4291
5164
  return records;
4292
5165
  }
4293
5166
  async function loadPageWithMeta(root, dir, slug) {
4294
- const filePath = path27.join(root, dir, `${slug}.md`);
5167
+ const filePath = path34.join(root, dir, `${slug}.md`);
4295
5168
  const content = await safeReadFile(filePath);
4296
5169
  if (!content) {
4297
5170
  throw new Error(`Page not found: ${dir}/${slug}.md`);
@@ -4300,10 +5173,10 @@ async function loadPageWithMeta(root, dir, slug) {
4300
5173
  return { slug, meta, body: body.trim() };
4301
5174
  }
4302
5175
  async function listPagesUnder(root, dir, scheme) {
4303
- const pagesPath = path27.join(root, dir);
5176
+ const pagesPath = path34.join(root, dir);
4304
5177
  let files;
4305
5178
  try {
4306
- files = await readdir9(pagesPath);
5179
+ files = await readdir11(pagesPath);
4307
5180
  } catch {
4308
5181
  return { resources: [] };
4309
5182
  }
@@ -4327,8 +5200,8 @@ async function startMCPServer(options) {
4327
5200
  }
4328
5201
 
4329
5202
  // src/cli.ts
4330
- var require2 = createRequire(import.meta.url);
4331
- var { version } = require2("../package.json");
5203
+ var require3 = createRequire2(import.meta.url);
5204
+ var { version } = require3("../package.json");
4332
5205
  var program = new Command();
4333
5206
  program.name("llmwiki").description("The knowledge compiler \u2014 raw sources in, interlinked wiki out").version(version);
4334
5207
  program.command("ingest <source>").description("Ingest a URL or local file into sources/").action(async (source2) => {
@@ -4339,11 +5212,23 @@ program.command("ingest <source>").description("Ingest a URL or local file into
4339
5212
  process.exit(1);
4340
5213
  }
4341
5214
  });
5215
+ program.command("ingest-session <path>").description("Ingest a coding-agent session export (Claude, Codex, Cursor) into sources/").action(async (targetPath) => {
5216
+ try {
5217
+ await ingestSession(targetPath);
5218
+ } catch (err) {
5219
+ console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
5220
+ process.exit(1);
5221
+ }
5222
+ });
4342
5223
  program.command("compile").description("Compile sources/ into an interlinked wiki").option(
4343
5224
  "--review",
4344
5225
  "Write generated pages as review candidates under .llmwiki/candidates/ instead of mutating wiki/. Orphan-marking for deleted sources is deferred until the next non-review compile."
5226
+ ).option(
5227
+ "--lang <code>",
5228
+ 'Target language for generated wiki content (e.g. "Chinese", "ja", "zh-CN"). Equivalent to setting LLMWIKI_OUTPUT_LANG.'
4345
5229
  ).action(async (options) => {
4346
5230
  try {
5231
+ applyLanguageOption(options.lang);
4347
5232
  requireProvider();
4348
5233
  await compileCommand({ review: options.review });
4349
5234
  } catch (err) {
@@ -4384,15 +5269,21 @@ reviewCommand.command("reject <id>").description("Reject a candidate and archive
4384
5269
  process.exit(1);
4385
5270
  }
4386
5271
  });
4387
- program.command("query <question>").description("Ask a question against the wiki").option("--save", "Save the answer as a wiki page").option("--debug", "Print which pages and chunks were selected and their scores").action(async (question, options) => {
4388
- try {
4389
- requireProvider();
4390
- await queryCommand(process.cwd(), question, options);
4391
- } catch (err) {
4392
- console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
4393
- process.exit(1);
5272
+ program.command("query <question>").description("Ask a question against the wiki").option("--save", "Save the answer as a wiki page").option("--debug", "Print which pages and chunks were selected and their scores").option(
5273
+ "--lang <code>",
5274
+ 'Target language for the answer (e.g. "Chinese", "ja", "zh-CN"). Equivalent to setting LLMWIKI_OUTPUT_LANG.'
5275
+ ).action(
5276
+ async (question, options) => {
5277
+ try {
5278
+ applyLanguageOption(options.lang);
5279
+ requireProvider();
5280
+ await queryCommand(process.cwd(), question, options);
5281
+ } catch (err) {
5282
+ console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
5283
+ process.exit(1);
5284
+ }
4394
5285
  }
4395
- });
5286
+ );
4396
5287
  program.command("watch").description("Watch sources/ and auto-recompile on changes").action(async () => {
4397
5288
  try {
4398
5289
  requireProvider();
@@ -4427,6 +5318,17 @@ schemaCmd.command("show").description("Print the resolved schema for this projec
4427
5318
  process.exit(1);
4428
5319
  }
4429
5320
  });
5321
+ program.command("export").description("Export wiki content to portable formats (llms.txt, JSON, GraphML, Marp, \u2026)").option("--target <name>", "Limit export to a single target format").option(
5322
+ "--source <kind>",
5323
+ "For marp target: which pages to include \u2014 concepts, queries, or all (default: all)"
5324
+ ).action(async (options) => {
5325
+ try {
5326
+ await exportCommand(process.cwd(), options);
5327
+ } catch (err) {
5328
+ console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
5329
+ process.exit(1);
5330
+ }
5331
+ });
4430
5332
  program.command("serve").description("Start an MCP server exposing wiki tools and resources over stdio").option("--root <dir>", "Project root directory", process.cwd()).action(async (options) => {
4431
5333
  try {
4432
5334
  await startMCPServer({ root: options.root, version });
@@ -4435,6 +5337,11 @@ program.command("serve").description("Start an MCP server exposing wiki tools an
4435
5337
  process.exit(1);
4436
5338
  }
4437
5339
  });
5340
+ function applyLanguageOption(lang) {
5341
+ if (lang && lang.trim().length > 0) {
5342
+ process.env.LLMWIKI_OUTPUT_LANG = lang.trim();
5343
+ }
5344
+ }
4438
5345
  var PROVIDER_KEY_VARS2 = {
4439
5346
  anthropic: "ANTHROPIC_API_KEY",
4440
5347
  openai: "OPENAI_API_KEY",