llm-wiki-compiler 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -2,12 +2,12 @@
2
2
 
3
3
  // src/cli.ts
4
4
  import "dotenv/config";
5
- import { createRequire } from "module";
5
+ import { createRequire as createRequire2 } from "module";
6
6
  import { Command } from "commander";
7
7
 
8
8
  // src/commands/ingest.ts
9
- import path7 from "path";
10
- import { mkdir as mkdir2, readFile as readFile6, writeFile as writeFile2 } from "fs/promises";
9
+ import path8 from "path";
10
+ import { readFile as readFile7 } from "fs/promises";
11
11
 
12
12
  // src/utils/markdown.ts
13
13
  import { writeFile, rename, readFile, mkdir } from "fs/promises";
@@ -22,7 +22,7 @@ var VALID_PROVENANCE_STATES = /* @__PURE__ */ new Set([
22
22
  "ambiguous"
23
23
  ]);
24
24
  function slugify(title) {
25
- return title.toLowerCase().replace(/['']/g, "").replace(/[^\w\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
25
+ return title.toLowerCase().replace(/['']/g, "").replace(/[^\p{L}\p{N}\s-]/gu, "").replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
26
26
  }
27
27
  function buildFrontmatter(fields) {
28
28
  const dumped = yaml.dump(fields, { lineWidth: -1, quotingType: '"' }).trimEnd();
@@ -103,16 +103,11 @@ function parseContradictedBy(raw) {
103
103
  const refs = raw.map(coerceContradictionEntry).filter((ref) => ref !== null);
104
104
  return refs.length > 0 ? refs : void 0;
105
105
  }
106
- function parseInferredParagraphs(raw) {
107
- if (typeof raw !== "number" || !Number.isInteger(raw) || raw < 0) return void 0;
108
- return raw;
109
- }
110
106
  function parseProvenanceMetadata(meta) {
111
107
  return {
112
108
  confidence: parseConfidence(meta.confidence),
113
109
  provenanceState: parseProvenanceState(meta.provenanceState),
114
- contradictedBy: parseContradictedBy(meta.contradictedBy),
115
- inferredParagraphs: parseInferredParagraphs(meta.inferredParagraphs)
110
+ contradictedBy: parseContradictedBy(meta.contradictedBy)
116
111
  };
117
112
  }
118
113
  function validateWikiPage(content) {
@@ -123,9 +118,16 @@ function validateWikiPage(content) {
123
118
  return true;
124
119
  }
125
120
 
121
+ // src/utils/source-writer.ts
122
+ import { mkdir as mkdir2, readFile as readFile2, writeFile as writeFile2 } from "fs/promises";
123
+ import path2 from "path";
124
+ import { createHash } from "crypto";
125
+
126
126
  // src/utils/constants.ts
127
127
  var MAX_SOURCE_CHARS = 1e5;
128
128
  var MIN_SOURCE_CHARS = 50;
129
+ var DEFAULT_PROMPT_BUDGET_CHARS = 2e5;
130
+ var PROMPT_BUDGET_ENV_VAR = "LLMWIKI_PROMPT_BUDGET_CHARS";
129
131
  var QUERY_PAGE_LIMIT = 5;
130
132
  var COMPILE_CONCURRENCY = 5;
131
133
  var RETRY_COUNT = 3;
@@ -169,6 +171,42 @@ var EMBEDDING_MODELS = {
169
171
  ollama: "nomic-embed-text"
170
172
  };
171
173
 
174
+ // src/utils/source-writer.ts
175
+ var COLLISION_HASH_LEN = 8;
176
+ function shortHashOfSource(source2) {
177
+ return createHash("sha256").update(source2).digest("hex").slice(0, COLLISION_HASH_LEN);
178
+ }
179
+ async function resolveCollisionFreeFilename(slug, source2) {
180
+ const candidate = `${slug}.md`;
181
+ const candidatePath2 = path2.join(SOURCES_DIR, candidate);
182
+ let existing;
183
+ try {
184
+ existing = await readFile2(candidatePath2, "utf-8");
185
+ } catch (err) {
186
+ const e = err;
187
+ if (e.code === "ENOENT") return candidate;
188
+ throw err;
189
+ }
190
+ const { meta } = parseFrontmatter(existing);
191
+ if (typeof meta.source === "string" && meta.source === source2) {
192
+ return candidate;
193
+ }
194
+ return `${slug}-${shortHashOfSource(source2)}.md`;
195
+ }
196
+ async function saveSource(title, document, source2) {
197
+ const slug = slugify(title);
198
+ if (!slug) {
199
+ throw new Error(
200
+ `Could not derive a filename from title "${title}". The title contains no letter or number characters. Rename the source file to one with at least one letter or digit.`
201
+ );
202
+ }
203
+ await mkdir2(SOURCES_DIR, { recursive: true });
204
+ const filename = await resolveCollisionFreeFilename(slug, source2);
205
+ const destPath = path2.join(SOURCES_DIR, filename);
206
+ await writeFile2(destPath, document, "utf-8");
207
+ return destPath;
208
+ }
209
+
172
210
  // src/utils/output.ts
173
211
  var RESET = "\x1B[0m";
174
212
  var BOLD = "\x1B[1m";
@@ -244,13 +282,13 @@ async function ingestWeb(url) {
244
282
  }
245
283
 
246
284
  // src/ingest/file.ts
247
- import { readFile as readFile2 } from "fs/promises";
248
- import path3 from "path";
285
+ import { readFile as readFile3 } from "fs/promises";
286
+ import path4 from "path";
249
287
 
250
288
  // src/ingest/shared.ts
251
- import path2 from "path";
289
+ import path3 from "path";
252
290
  function titleFromFilename(filePath) {
253
- const basename = path2.basename(filePath, path2.extname(filePath));
291
+ const basename = path3.basename(filePath, path3.extname(filePath));
254
292
  return basename.replace(/[-_]+/g, " ").trim();
255
293
  }
256
294
 
@@ -262,20 +300,20 @@ ${text}
262
300
  \`\`\``;
263
301
  }
264
302
  async function ingestFile(filePath) {
265
- const ext = path3.extname(filePath).toLowerCase();
303
+ const ext = path4.extname(filePath).toLowerCase();
266
304
  if (!SUPPORTED_EXTENSIONS.has(ext)) {
267
305
  throw new Error(
268
306
  `Unsupported file type "${ext}". Only .md and .txt files are supported.`
269
307
  );
270
308
  }
271
- const raw = await readFile2(filePath, "utf-8");
309
+ const raw = await readFile3(filePath, "utf-8");
272
310
  const title = titleFromFilename(filePath);
273
311
  const content = ext === ".md" ? raw : wrapPlainText(raw);
274
312
  return { title, content };
275
313
  }
276
314
 
277
315
  // src/ingest/pdf.ts
278
- import { readFile as readFile3 } from "fs/promises";
316
+ import { readFile as readFile4 } from "fs/promises";
279
317
  function resolveTitle(filePath, info2) {
280
318
  if (info2 && typeof info2 === "object") {
281
319
  const titleField = info2["Title"];
@@ -287,7 +325,7 @@ function resolveTitle(filePath, info2) {
287
325
  }
288
326
  async function ingestPdf(filePath) {
289
327
  const { PDFParse } = await import("pdf-parse");
290
- const buffer = await readFile3(filePath);
328
+ const buffer = await readFile4(filePath);
291
329
  const parser = new PDFParse({ data: new Uint8Array(buffer) });
292
330
  try {
293
331
  const textResult = await parser.getText();
@@ -301,8 +339,8 @@ async function ingestPdf(filePath) {
301
339
  }
302
340
 
303
341
  // src/ingest/image.ts
304
- import { readFile as readFile4 } from "fs/promises";
305
- import path5 from "path";
342
+ import { readFile as readFile5 } from "fs/promises";
343
+ import path6 from "path";
306
344
  import Anthropic2 from "@anthropic-ai/sdk";
307
345
 
308
346
  // src/providers/anthropic.ts
@@ -419,7 +457,7 @@ var AnthropicProvider = class {
419
457
  // src/utils/claude-settings.ts
420
458
  import { readFileSync } from "fs";
421
459
  import { homedir } from "os";
422
- import path4 from "path";
460
+ import path5 from "path";
423
461
  var CLAUDE_SETTINGS_PATH_ENV = "LLMWIKI_CLAUDE_SETTINGS_PATH";
424
462
  function isRecord(value) {
425
463
  return typeof value === "object" && value !== null;
@@ -430,7 +468,7 @@ function normalize(value) {
430
468
  return trimmed.length > 0 ? trimmed : void 0;
431
469
  }
432
470
  function resolveClaudeSettingsPath(env) {
433
- return env[CLAUDE_SETTINGS_PATH_ENV] ?? path4.join(homedir(), ".claude", "settings.json");
471
+ return env[CLAUDE_SETTINGS_PATH_ENV] ?? path5.join(homedir(), ".claude", "settings.json");
434
472
  }
435
473
  function readClaudeSettingsFile(settingsPath) {
436
474
  try {
@@ -563,9 +601,9 @@ async function ingestImage(filePath) {
563
601
  `Image ingest requires the Anthropic provider (vision). Current provider: "${providerName}". Set LLMWIKI_PROVIDER=anthropic and ANTHROPIC_API_KEY to use image ingest.`
564
602
  );
565
603
  }
566
- const ext = path5.extname(filePath).toLowerCase();
604
+ const ext = path6.extname(filePath).toLowerCase();
567
605
  const mimeType = mimeTypeForExtension(ext);
568
- const imageBuffer = await readFile4(filePath);
606
+ const imageBuffer = await readFile5(filePath);
569
607
  const imageData = imageBuffer.toString("base64");
570
608
  const client = buildClient();
571
609
  const model = resolveAnthropicModelFromEnv() ?? PROVIDER_MODELS.anthropic;
@@ -575,10 +613,9 @@ async function ingestImage(filePath) {
575
613
  }
576
614
 
577
615
  // src/ingest/transcript.ts
578
- import { readFile as readFile5 } from "fs/promises";
579
- import path6 from "path";
580
- import { YoutubeTranscript as YoutubeTranscriptUntyped } from "youtube-transcript/dist/youtube-transcript.esm.js";
581
- var YoutubeTranscript = YoutubeTranscriptUntyped;
616
+ import { readFile as readFile6 } from "fs/promises";
617
+ import path7 from "path";
618
+ import { YoutubeTranscript } from "youtube-transcript";
582
619
  var YOUTUBE_URL_PATTERN = /^https?:\/\/(www\.)?(youtube\.com\/watch|youtu\.be\/)/;
583
620
  var SRT_SEQUENCE_PATTERN = /^\d+$/;
584
621
  var TIMESTAMP_PATTERN = /\d{2}:\d{2}[:.]\d{2}/;
@@ -662,8 +699,8 @@ async function ingestTranscript(source2) {
662
699
  if (isYoutubeUrl(source2)) {
663
700
  return fetchYoutubeTranscript(source2);
664
701
  }
665
- const ext = path6.extname(source2).toLowerCase();
666
- const raw = await readFile5(source2, "utf-8");
702
+ const ext = path7.extname(source2).toLowerCase();
703
+ const raw = await readFile6(source2, "utf-8");
667
704
  if (ext === ".vtt") return parseVtt(raw, source2);
668
705
  if (ext === ".srt") return parseSrt(raw, source2);
669
706
  if (ext === ".txt") return parsePlainTranscript(raw, source2);
@@ -702,7 +739,7 @@ function hasSpeakerDialoguePattern(sample) {
702
739
  return hasEnoughSpeakers && hasRepeatedSpeaker;
703
740
  }
704
741
  async function looksLikeTxtTranscript(filePath) {
705
- const raw = await readFile6(filePath, "utf-8");
742
+ const raw = await readFile7(filePath, "utf-8");
706
743
  const sample = raw.slice(0, TXT_SNIFF_BYTES);
707
744
  if (hasSpeakerDialoguePattern(sample)) return true;
708
745
  const timestampMatches = sample.match(new RegExp(TIMESTAMP_PATTERN2.source, "gm"));
@@ -742,7 +779,7 @@ function enforceMinContent(content) {
742
779
  }
743
780
  async function detectSourceType(source2) {
744
781
  if (!isUrl(source2)) {
745
- const ext = path7.extname(source2).toLowerCase();
782
+ const ext = path8.extname(source2).toLowerCase();
746
783
  if (ext === ".pdf") return "pdf";
747
784
  if (IMAGE_EXTENSIONS.has(ext)) return "image";
748
785
  if (TRANSCRIPT_EXTENSIONS.has(ext)) return "transcript";
@@ -788,13 +825,6 @@ async function fetchContent(source2, sourceType) {
788
825
  return ingestFile(source2);
789
826
  }
790
827
  }
791
- async function saveSource(title, document) {
792
- const filename = `${slugify(title)}.md`;
793
- const destPath = path7.join(SOURCES_DIR, filename);
794
- await mkdir2(SOURCES_DIR, { recursive: true });
795
- await writeFile2(destPath, document, "utf-8");
796
- return destPath;
797
- }
798
828
  async function ingestSource(source2) {
799
829
  const sourceType = await detectSourceType(source2);
800
830
  status("*", info(`Ingesting [${sourceType}]: ${source2}`));
@@ -802,9 +832,9 @@ async function ingestSource(source2) {
802
832
  const result = enforceCharLimit(content);
803
833
  enforceMinContent(result.content);
804
834
  const document = buildDocument(title, source2, result, sourceType);
805
- const savedPath = await saveSource(title, document);
835
+ const savedPath = await saveSource(title, document, source2);
806
836
  return {
807
- filename: path7.basename(savedPath),
837
+ filename: path8.basename(savedPath),
808
838
  charCount: result.content.length,
809
839
  truncated: result.truncated,
810
840
  source: source2,
@@ -813,7 +843,7 @@ async function ingestSource(source2) {
813
843
  }
814
844
  async function ingest(source2) {
815
845
  const result = await ingestSource(source2);
816
- const savedPath = path7.join(SOURCES_DIR, result.filename);
846
+ const savedPath = path8.join(SOURCES_DIR, result.filename);
817
847
  status(
818
848
  "+",
819
849
  success(`Saved ${bold(result.filename)} \u2192 ${source(savedPath)}`)
@@ -821,27 +851,390 @@ async function ingest(source2) {
821
851
  status("\u2192", dim("Next: llmwiki compile"));
822
852
  }
823
853
 
854
+ // src/commands/ingest-session.ts
855
+ import path12 from "path";
856
+ import { readdir, stat } from "fs/promises";
857
+
858
+ // src/adapters/claude.ts
859
+ import { readFile as readFile8 } from "fs/promises";
860
+ import path9 from "path";
861
+
862
+ // src/adapters/utils.ts
863
+ var MAX_TITLE_CHARS = 80;
864
+ function truncateTitle(text) {
865
+ const trimmed = text.trim();
866
+ return trimmed.length > MAX_TITLE_CHARS ? trimmed.slice(0, MAX_TITLE_CHARS).trimEnd() + "\u2026" : trimmed;
867
+ }
868
+ function resolveSessionTitle(rawTitle, firstUserContent, defaultTitle) {
869
+ if (rawTitle && rawTitle.trim().length > 0) return truncateTitle(rawTitle);
870
+ if (firstUserContent) {
871
+ const firstLine = firstUserContent.split("\n")[0];
872
+ if (firstLine.trim().length > 0) return truncateTitle(firstLine);
873
+ }
874
+ return defaultTitle;
875
+ }
876
+ function parseJsonOrThrow(raw, filePath) {
877
+ try {
878
+ return JSON.parse(raw);
879
+ } catch {
880
+ throw new Error(`Invalid JSON in session file: ${filePath}`);
881
+ }
882
+ }
883
+
884
+ // src/adapters/claude.ts
885
+ var CLAUDE_EXTENSION = ".jsonl";
886
+ var CLAUDE_TYPE_MARKERS = /* @__PURE__ */ new Set(["user", "assistant", "system", "tool_use", "tool_result"]);
887
+ function extractText(content) {
888
+ if (typeof content === "string") return content;
889
+ return content.filter((b) => b.type === "text" && typeof b.text === "string").map((b) => b.text).join("\n");
890
+ }
891
+ function titleFromFirstUserMessage(turns) {
892
+ const firstUser = turns.find((t) => t.role === "user" && t.content.trim().length > 0);
893
+ return resolveSessionTitle(void 0, firstUser?.content, "Claude Session");
894
+ }
895
+ function parseLine(line) {
896
+ try {
897
+ return JSON.parse(line);
898
+ } catch {
899
+ return null;
900
+ }
901
+ }
902
+ function eventToTurn(event) {
903
+ if (!event.message || !event.message.role) return null;
904
+ const role = event.message.role;
905
+ if (role !== "user" && role !== "assistant") return null;
906
+ const content = extractText(event.message.content);
907
+ if (content.trim().length === 0) return null;
908
+ return { role, content, timestamp: event.timestamp };
909
+ }
910
+ var claudeAdapter = {
911
+ name: "claude",
912
+ async detect(filePath) {
913
+ if (path9.extname(filePath).toLowerCase() !== CLAUDE_EXTENSION) return false;
914
+ const raw = await readFile8(filePath, "utf-8").catch(() => "");
915
+ const firstLine = raw.split("\n")[0].trim();
916
+ if (!firstLine.startsWith("{")) return false;
917
+ try {
918
+ const obj = JSON.parse(firstLine);
919
+ return typeof obj.type === "string" && CLAUDE_TYPE_MARKERS.has(obj.type);
920
+ } catch {
921
+ return false;
922
+ }
923
+ },
924
+ async parse(filePath) {
925
+ const raw = await readFile8(filePath, "utf-8");
926
+ const lines = raw.split("\n").filter((l) => l.trim().length > 0);
927
+ if (lines.length === 0) {
928
+ throw new Error(`Claude session file is empty: ${filePath}`);
929
+ }
930
+ const turns = [];
931
+ const timestamps = [];
932
+ for (const [index, line] of lines.entries()) {
933
+ const event = parseLine(line);
934
+ if (event === null) {
935
+ throw new Error(
936
+ `Malformed JSON on line ${index + 1} of Claude session: ${filePath}`
937
+ );
938
+ }
939
+ if (event.timestamp) timestamps.push(event.timestamp);
940
+ const turn = eventToTurn(event);
941
+ if (turn) turns.push(turn);
942
+ }
943
+ const title = titleFromFirstUserMessage(turns);
944
+ return {
945
+ title,
946
+ adapter: "claude",
947
+ startedAt: timestamps[0],
948
+ endedAt: timestamps[timestamps.length - 1],
949
+ participantIdentity: "Claude Code",
950
+ turns
951
+ };
952
+ }
953
+ };
954
+
955
+ // src/adapters/codex.ts
956
+ import { readFile as readFile9 } from "fs/promises";
957
+ import path10 from "path";
958
+ var CODEX_EXTENSION = ".json";
959
+ function unixToIso(ts) {
960
+ return new Date(ts * 1e3).toISOString();
961
+ }
962
+ function extractTurns(mapping) {
963
+ const turns = [];
964
+ for (const node of Object.values(mapping)) {
965
+ const msg = node.message;
966
+ if (!msg) continue;
967
+ const role = msg.author?.role;
968
+ if (role !== "user" && role !== "assistant") continue;
969
+ const content = (msg.content?.parts ?? []).join("\n").trim();
970
+ if (content.length === 0) continue;
971
+ turns.push({
972
+ role,
973
+ content,
974
+ timestamp: msg.create_time != null ? unixToIso(msg.create_time) : void 0
975
+ });
976
+ }
977
+ turns.sort((a, b) => {
978
+ if (!a.timestamp || !b.timestamp) return 0;
979
+ return a.timestamp.localeCompare(b.timestamp);
980
+ });
981
+ return turns;
982
+ }
983
+ function isCodexExport(value) {
984
+ return Array.isArray(value) && value.length > 0 && typeof value[0].mapping === "object";
985
+ }
986
+ var codexAdapter = {
987
+ name: "codex",
988
+ async detect(filePath) {
989
+ if (path10.extname(filePath).toLowerCase() !== CODEX_EXTENSION) return false;
990
+ const raw = await readFile9(filePath, "utf-8").catch(() => "");
991
+ if (raw.trimStart()[0] !== "[") return false;
992
+ try {
993
+ return isCodexExport(JSON.parse(raw));
994
+ } catch {
995
+ return false;
996
+ }
997
+ },
998
+ async parse(filePath) {
999
+ const raw = await readFile9(filePath, "utf-8");
1000
+ const parsed = parseJsonOrThrow(raw, filePath);
1001
+ if (!isCodexExport(parsed)) {
1002
+ throw new Error(
1003
+ `Codex session file does not contain a conversation array: ${filePath}`
1004
+ );
1005
+ }
1006
+ const conv = parsed[0];
1007
+ const turns = extractTurns(conv.mapping ?? {});
1008
+ const firstUser = turns.find((t) => t.role === "user");
1009
+ return {
1010
+ title: resolveSessionTitle(conv.title, firstUser?.content, "Codex Session"),
1011
+ adapter: "codex",
1012
+ startedAt: conv.create_time != null ? unixToIso(conv.create_time) : void 0,
1013
+ endedAt: conv.update_time != null ? unixToIso(conv.update_time) : void 0,
1014
+ participantIdentity: "OpenAI Codex",
1015
+ turns
1016
+ };
1017
+ }
1018
+ };
1019
+
1020
+ // src/adapters/cursor.ts
1021
+ import { readFile as readFile10 } from "fs/promises";
1022
+ import path11 from "path";
1023
+ var CURSOR_EXTENSION = ".json";
1024
+ function isTabsExport(value) {
1025
+ return typeof value === "object" && value !== null && "tabs" in value && Array.isArray(value.tabs);
1026
+ }
1027
+ function isFlatExport(value) {
1028
+ return typeof value === "object" && value !== null && "messages" in value && Array.isArray(value.messages);
1029
+ }
1030
+ function extractMessagesAndTitle(data) {
1031
+ if (isTabsExport(data)) {
1032
+ const tab = data.tabs[0];
1033
+ return { messages: tab?.messages ?? [], title: tab?.title };
1034
+ }
1035
+ return { messages: data.messages, title: data.title };
1036
+ }
1037
+ function toTurns(messages) {
1038
+ const turns = [];
1039
+ for (const msg of messages) {
1040
+ const role = msg.role;
1041
+ if (role !== "user" && role !== "assistant") continue;
1042
+ const content = (msg.content ?? "").trim();
1043
+ if (content.length === 0) continue;
1044
+ turns.push({ role, content, timestamp: msg.timestamp });
1045
+ }
1046
+ return turns;
1047
+ }
1048
+ var cursorAdapter = {
1049
+ name: "cursor",
1050
+ async detect(filePath) {
1051
+ if (path11.extname(filePath).toLowerCase() !== CURSOR_EXTENSION) return false;
1052
+ const raw = await readFile10(filePath, "utf-8").catch(() => "");
1053
+ if (raw.trimStart()[0] !== "{") return false;
1054
+ try {
1055
+ const parsed = JSON.parse(raw);
1056
+ return isTabsExport(parsed) || isFlatExport(parsed);
1057
+ } catch {
1058
+ return false;
1059
+ }
1060
+ },
1061
+ async parse(filePath) {
1062
+ const raw = await readFile10(filePath, "utf-8");
1063
+ const parsed = parseJsonOrThrow(raw, filePath);
1064
+ if (!isTabsExport(parsed) && !isFlatExport(parsed)) {
1065
+ throw new Error(
1066
+ `Cursor session file does not match a known Cursor export schema: ${filePath}`
1067
+ );
1068
+ }
1069
+ const { messages, title: rawTitle } = extractMessagesAndTitle(parsed);
1070
+ const turns = toTurns(messages);
1071
+ const firstUser = turns.find((t) => t.role === "user");
1072
+ const timestamps = turns.filter((t) => t.timestamp != null).map((t) => t.timestamp);
1073
+ return {
1074
+ title: resolveSessionTitle(rawTitle, firstUser?.content, "Cursor Session"),
1075
+ adapter: "cursor",
1076
+ startedAt: timestamps[0],
1077
+ endedAt: timestamps[timestamps.length - 1],
1078
+ participantIdentity: "Cursor AI",
1079
+ turns
1080
+ };
1081
+ }
1082
+ };
1083
+
1084
+ // src/adapters/registry.ts
1085
+ var ADAPTERS = [claudeAdapter, codexAdapter, cursorAdapter];
1086
+ async function detectAdapter(filePath) {
1087
+ for (const adapter of ADAPTERS) {
1088
+ if (await adapter.detect(filePath)) return adapter;
1089
+ }
1090
+ return null;
1091
+ }
1092
+ async function parseSessionFile(filePath) {
1093
+ const adapter = await detectAdapter(filePath);
1094
+ if (!adapter) {
1095
+ throw new Error(
1096
+ `No session adapter recognised the file: ${filePath}
1097
+ Supported formats: ${ADAPTERS.map((a) => a.name).join(", ")}`
1098
+ );
1099
+ }
1100
+ const session = await adapter.parse(filePath);
1101
+ assertSessionHasUsableTurns(session, filePath);
1102
+ return session;
1103
+ }
1104
+ function assertSessionHasUsableTurns(session, filePath) {
1105
+ const hasUsableTurn = session.turns.some(
1106
+ (t) => (t.role === "user" || t.role === "assistant") && t.content.trim().length > 0
1107
+ );
1108
+ if (!hasUsableTurn) {
1109
+ throw new Error(
1110
+ `${session.adapter} session has no usable turns: ${filePath}
1111
+ The file matches the ${session.adapter} export shape, but no user or assistant message with content was found. Re-export the session or delete the file if it is empty.`
1112
+ );
1113
+ }
1114
+ }
1115
+ function formatSessionAsMarkdown(session) {
1116
+ const lines = [];
1117
+ for (const turn of session.turns) {
1118
+ const label = turn.role === "user" ? "User" : session.participantIdentity ?? "Assistant";
1119
+ const heading = turn.timestamp ? `### ${label} _(${turn.timestamp})_` : `### ${label}`;
1120
+ lines.push(heading);
1121
+ lines.push("");
1122
+ lines.push(turn.content);
1123
+ lines.push("");
1124
+ }
1125
+ return lines.join("\n").trimEnd();
1126
+ }
1127
+
1128
+ // src/commands/ingest-session.ts
1129
+ function buildSessionFrontmatter(session, sourcePath) {
1130
+ const meta = {
1131
+ title: session.title,
1132
+ source: sourcePath,
1133
+ adapter: session.adapter,
1134
+ ingestedAt: (/* @__PURE__ */ new Date()).toISOString()
1135
+ };
1136
+ if (session.startedAt) meta.sessionStartedAt = session.startedAt;
1137
+ if (session.endedAt) meta.sessionEndedAt = session.endedAt;
1138
+ if (session.participantIdentity) meta.participant = session.participantIdentity;
1139
+ return buildFrontmatter(meta);
1140
+ }
1141
+ async function saveSessionSource(session, sourcePath) {
1142
+ const frontmatter = buildSessionFrontmatter(session, sourcePath);
1143
+ const body = formatSessionAsMarkdown(session);
1144
+ const document = `${frontmatter}
1145
+
1146
+ ${body}
1147
+ `;
1148
+ return saveSource(session.title, document, sourcePath);
1149
+ }
1150
+ async function ingestSessionFile(filePath) {
1151
+ status("*", info(`Ingesting session: ${filePath}`));
1152
+ const session = await parseSessionFile(filePath);
1153
+ const savedPath = await saveSessionSource(session, filePath);
1154
+ status(
1155
+ "+",
1156
+ success(
1157
+ `Saved ${bold(path12.basename(savedPath))} [${session.adapter}] \u2192 ${source(savedPath)}`
1158
+ )
1159
+ );
1160
+ return {
1161
+ filename: path12.basename(savedPath),
1162
+ adapter: session.adapter,
1163
+ title: session.title,
1164
+ source: filePath
1165
+ };
1166
+ }
1167
+ async function listDirectoryFiles(dirPath) {
1168
+ const entries = await readdir(dirPath);
1169
+ const files = [];
1170
+ for (const entry of entries) {
1171
+ const full = path12.join(dirPath, entry);
1172
+ const info2 = await stat(full);
1173
+ if (info2.isFile()) files.push(full);
1174
+ }
1175
+ return files;
1176
+ }
1177
+ async function ingestDirectory(dirPath) {
1178
+ const files = await listDirectoryFiles(dirPath);
1179
+ if (files.length === 0) {
1180
+ throw new Error(`No files found in directory: ${dirPath}`);
1181
+ }
1182
+ status("*", info(`Scanning ${files.length} file(s) in: ${dirPath}`));
1183
+ let imported = 0;
1184
+ let skipped = 0;
1185
+ for (const file of files) {
1186
+ try {
1187
+ await ingestSessionFile(file);
1188
+ imported++;
1189
+ } catch (err) {
1190
+ const message = err instanceof Error ? err.message : String(err);
1191
+ status("!", warn(`Skipped ${path12.basename(file)}: ${message}`));
1192
+ skipped++;
1193
+ }
1194
+ }
1195
+ if (imported === 0) {
1196
+ throw new Error(
1197
+ `No sessions imported from ${dirPath} (${skipped} file(s) skipped). Check that at least one file is in a supported session format.`
1198
+ );
1199
+ }
1200
+ status(
1201
+ "\u2192",
1202
+ dim(`Imported ${imported} session(s), skipped ${skipped}.`)
1203
+ );
1204
+ }
1205
+ async function ingestSession(targetPath) {
1206
+ const info2 = await stat(targetPath).catch(() => {
1207
+ throw new Error(`Path not found: ${targetPath}`);
1208
+ });
1209
+ if (info2.isDirectory()) {
1210
+ await ingestDirectory(targetPath);
1211
+ } else {
1212
+ await ingestSessionFile(targetPath);
1213
+ }
1214
+ status("\u2192", dim("Next: llmwiki compile"));
1215
+ }
1216
+
824
1217
  // src/commands/compile.ts
825
1218
  import { existsSync as existsSync7 } from "fs";
826
1219
 
827
1220
  // src/compiler/index.ts
828
- import { readFile as readFile14 } from "fs/promises";
829
- import path21 from "path";
1221
+ import { readFile as readFile18 } from "fs/promises";
1222
+ import path26 from "path";
830
1223
 
831
1224
  // src/utils/state.ts
832
- import { readFile as readFile7, writeFile as writeFile3, rename as rename2, mkdir as mkdir3, copyFile } from "fs/promises";
1225
+ import { readFile as readFile11, writeFile as writeFile3, rename as rename2, mkdir as mkdir3, copyFile } from "fs/promises";
833
1226
  import { existsSync } from "fs";
834
- import path8 from "path";
1227
+ import path13 from "path";
835
1228
  function emptyState() {
836
1229
  return { version: 1, indexHash: "", sources: {} };
837
1230
  }
838
1231
  async function readState(root) {
839
- const filePath = path8.join(root, STATE_FILE);
1232
+ const filePath = path13.join(root, STATE_FILE);
840
1233
  if (!existsSync(filePath)) {
841
1234
  return emptyState();
842
1235
  }
843
1236
  try {
844
- const raw = await readFile7(filePath, "utf-8");
1237
+ const raw = await readFile11(filePath, "utf-8");
845
1238
  return JSON.parse(raw);
846
1239
  } catch {
847
1240
  const bakPath = filePath + ".bak";
@@ -851,9 +1244,9 @@ async function readState(root) {
851
1244
  }
852
1245
  }
853
1246
  async function writeState(root, state) {
854
- const dir = path8.join(root, LLMWIKI_DIR);
1247
+ const dir = path13.join(root, LLMWIKI_DIR);
855
1248
  await mkdir3(dir, { recursive: true });
856
- const filePath = path8.join(root, STATE_FILE);
1249
+ const filePath = path13.join(root, STATE_FILE);
857
1250
  const tmpPath = filePath + ".tmp";
858
1251
  await writeFile3(tmpPath, JSON.stringify(state, null, 2), "utf-8");
859
1252
  await rename2(tmpPath, filePath);
@@ -870,18 +1263,18 @@ async function removeSourceState(root, sourceFile) {
870
1263
  }
871
1264
 
872
1265
  // src/compiler/source-state.ts
873
- import path10 from "path";
1266
+ import path15 from "path";
874
1267
 
875
1268
  // src/compiler/hasher.ts
876
- import { createHash } from "crypto";
877
- import { readFile as readFile8, readdir } from "fs/promises";
878
- import path9 from "path";
1269
+ import { createHash as createHash2 } from "crypto";
1270
+ import { readFile as readFile12, readdir as readdir2 } from "fs/promises";
1271
+ import path14 from "path";
879
1272
  async function hashFile(filePath) {
880
- const content = await readFile8(filePath, "utf-8");
881
- return createHash("sha256").update(content).digest("hex");
1273
+ const content = await readFile12(filePath, "utf-8");
1274
+ return createHash2("sha256").update(content).digest("hex");
882
1275
  }
883
1276
  async function detectChanges(root, prevState) {
884
- const sourcesPath = path9.join(root, SOURCES_DIR);
1277
+ const sourcesPath = path14.join(root, SOURCES_DIR);
885
1278
  const currentFiles = await listSourceFiles(sourcesPath);
886
1279
  const changes = [];
887
1280
  for (const file of currentFiles) {
@@ -894,14 +1287,14 @@ async function detectChanges(root, prevState) {
894
1287
  }
895
1288
  async function listSourceFiles(sourcesPath) {
896
1289
  try {
897
- const entries = await readdir(sourcesPath);
1290
+ const entries = await readdir2(sourcesPath);
898
1291
  return entries.filter((f) => f.endsWith(".md"));
899
1292
  } catch {
900
1293
  return [];
901
1294
  }
902
1295
  }
903
1296
  async function classifyFile(root, file, prevState) {
904
- const filePath = path9.join(root, SOURCES_DIR, file);
1297
+ const filePath = path14.join(root, SOURCES_DIR, file);
905
1298
  const hash = await hashFile(filePath);
906
1299
  const prev = prevState.sources[file];
907
1300
  if (!prev) return "new";
@@ -924,7 +1317,7 @@ async function buildExtractionSourceStates(root, extractions) {
924
1317
  return snapshot;
925
1318
  }
926
1319
  async function buildEntry(root, result, compiledAt) {
927
- const filePath = path10.join(root, SOURCES_DIR, result.sourceFile);
1320
+ const filePath = path15.join(root, SOURCES_DIR, result.sourceFile);
928
1321
  const hash = await hashFile(filePath);
929
1322
  return {
930
1323
  hash,
@@ -1162,8 +1555,8 @@ async function callClaude(options) {
1162
1555
  }
1163
1556
 
1164
1557
  // src/utils/lock.ts
1165
- import { open, readFile as readFile9, unlink, mkdir as mkdir4 } from "fs/promises";
1166
- import path11 from "path";
1558
+ import { open, readFile as readFile13, unlink, mkdir as mkdir4 } from "fs/promises";
1559
+ import path16 from "path";
1167
1560
  var RECLAIM_SUFFIX = ".reclaim";
1168
1561
  var MAX_ACQUIRE_ATTEMPTS = 2;
1169
1562
  function isProcessAlive(pid) {
@@ -1175,8 +1568,8 @@ function isProcessAlive(pid) {
1175
1568
  }
1176
1569
  }
1177
1570
  async function acquireLock(root) {
1178
- const lockPath = path11.join(root, LOCK_FILE);
1179
- await mkdir4(path11.join(root, LLMWIKI_DIR), { recursive: true });
1571
+ const lockPath = path16.join(root, LOCK_FILE);
1572
+ await mkdir4(path16.join(root, LLMWIKI_DIR), { recursive: true });
1180
1573
  for (let attempt = 0; attempt < MAX_ACQUIRE_ATTEMPTS; attempt++) {
1181
1574
  const created = await tryCreateLock(lockPath);
1182
1575
  if (created) return true;
@@ -1239,7 +1632,7 @@ async function tryCreateLock(lockPath) {
1239
1632
  }
1240
1633
  async function isLockStale(lockPath) {
1241
1634
  try {
1242
- const content = await readFile9(lockPath, "utf-8");
1635
+ const content = await readFile13(lockPath, "utf-8");
1243
1636
  const pid = parseInt(content.trim(), 10);
1244
1637
  if (isNaN(pid)) return true;
1245
1638
  return !isProcessAlive(pid);
@@ -1248,14 +1641,32 @@ async function isLockStale(lockPath) {
1248
1641
  }
1249
1642
  }
1250
1643
  async function releaseLock(root) {
1251
- const lockPath = path11.join(root, LOCK_FILE);
1644
+ const lockPath = path16.join(root, LOCK_FILE);
1252
1645
  try {
1253
1646
  await unlink(lockPath);
1254
1647
  } catch {
1255
1648
  }
1256
1649
  }
1257
1650
 
1651
+ // src/utils/output-language.ts
1652
+ var LANG_ENV_VAR = "LLMWIKI_OUTPUT_LANG";
1653
+ function getOutputLanguage() {
1654
+ const raw = process.env[LANG_ENV_VAR];
1655
+ if (!raw) return null;
1656
+ const trimmed = raw.trim();
1657
+ return trimmed.length > 0 ? trimmed : null;
1658
+ }
1659
+ function languageDirective() {
1660
+ const lang = getOutputLanguage();
1661
+ if (!lang) return "";
1662
+ return `Write the output in ${lang}.`;
1663
+ }
1664
+
1258
1665
  // src/compiler/prompts.ts
1666
+ function withLangLine(...lines) {
1667
+ const lang = languageDirective();
1668
+ return lang ? [...lines, lang] : lines;
1669
+ }
1259
1670
  var PROVENANCE_STATE_VALUES = [
1260
1671
  "extracted",
1261
1672
  "merged",
@@ -1310,10 +1721,6 @@ var CONCEPT_EXTRACTION_TOOL = {
1310
1721
  required: ["slug"]
1311
1722
  },
1312
1723
  description: "Slugs of other concepts whose evidence contradicts this one."
1313
- },
1314
- inferred_paragraphs: {
1315
- type: "integer",
1316
- description: "Estimated number of paragraphs in the page that will be inferred rather than directly cited."
1317
1724
  }
1318
1725
  },
1319
1726
  required: ["concept", "summary", "is_new"]
@@ -1330,11 +1737,13 @@ Here is the existing wiki index \u2014 avoid duplicating concepts already covere
1330
1737
 
1331
1738
  ${existingIndex}` : "\n\nNo existing wiki pages yet.";
1332
1739
  return [
1333
- "You are a knowledge extraction engine. Analyze the following source document",
1334
- "and identify 3-8 distinct, meaningful concepts worth documenting as wiki pages.",
1335
- "Each concept should be a standalone topic that someone might look up.",
1336
- "Focus on key ideas, techniques, patterns, or entities \u2014 not trivial details.",
1337
- "Use the extract_concepts tool to return your findings.",
1740
+ ...withLangLine(
1741
+ "You are a knowledge extraction engine. Analyze the following source document",
1742
+ "and identify 3-8 distinct, meaningful concepts worth documenting as wiki pages.",
1743
+ "Each concept should be a standalone topic that someone might look up.",
1744
+ "Focus on key ideas, techniques, patterns, or entities \u2014 not trivial details.",
1745
+ "Use the extract_concepts tool to return your findings."
1746
+ ),
1338
1747
  "",
1339
1748
  "For every concept, emit provenance metadata so downstream tools can reason",
1340
1749
  "about reliability:",
@@ -1344,8 +1753,6 @@ ${existingIndex}` : "\n\nNo existing wiki pages yet.";
1344
1753
  " or 'ambiguous' if the source is contradictory or unclear.",
1345
1754
  " - contradicted_by: slugs of other concepts (in this batch or the index)",
1346
1755
  " whose evidence conflicts with this one.",
1347
- " - inferred_paragraphs: estimated number of paragraphs in the resulting",
1348
- " page that will be inferred rather than directly citable.",
1349
1756
  indexSection,
1350
1757
  "\n\n--- SOURCE DOCUMENT ---\n\n",
1351
1758
  sourceContent
@@ -1363,11 +1770,13 @@ Related wiki pages for cross-referencing:
1363
1770
 
1364
1771
  ${relatedPages}` : "";
1365
1772
  return [
1366
- `You are a wiki author. Write a clear, well-structured markdown page about "${concept}".`,
1367
- "Draw facts only from the provided source material.",
1368
- "Include a ## Sources section at the end listing the source document.",
1369
- "Suggest [[wikilinks]] to related concepts where appropriate.",
1370
- "Write in a neutral, informative tone. Be concise but thorough.",
1773
+ ...withLangLine(
1774
+ `You are a wiki author. Write a clear, well-structured markdown page about "${concept}".`,
1775
+ "Draw facts only from the provided source material.",
1776
+ "Include a ## Sources section at the end listing the source document.",
1777
+ "Suggest [[wikilinks]] to related concepts where appropriate.",
1778
+ "Write in a neutral, informative tone. Be concise but thorough."
1779
+ ),
1371
1780
  "",
1372
1781
  "Source attribution: at the end of each prose paragraph, append a citation",
1373
1782
  "marker showing which source file(s) the paragraph drew from.",
@@ -1384,7 +1793,7 @@ ${relatedPages}` : "";
1384
1793
  "",
1385
1794
  "If a paragraph is your inference rather than a direct extraction, leave it",
1386
1795
  "uncited \u2014 downstream lint rules will count uncited paragraphs as 'inferred'",
1387
- "to compute the page's provenance metadata.",
1796
+ "so lint can surface excess-inferred-paragraphs warnings on review.",
1388
1797
  existingSection,
1389
1798
  relatedSection,
1390
1799
  "\n\n--- SOURCE MATERIAL ---\n\n",
@@ -1416,20 +1825,21 @@ function mapRawConcept(c) {
1416
1825
  tags: Array.isArray(c.tags) ? c.tags : void 0,
1417
1826
  confidence: typeof c.confidence === "number" ? c.confidence : void 0,
1418
1827
  provenanceState: provenance,
1419
- contradictedBy: coerceContradictedBy(c.contradicted_by),
1420
- inferredParagraphs: typeof c.inferred_paragraphs === "number" && Number.isInteger(c.inferred_paragraphs) && c.inferred_paragraphs >= 0 ? c.inferred_paragraphs : void 0
1828
+ contradictedBy: coerceContradictedBy(c.contradicted_by)
1421
1829
  };
1422
1830
  }
1423
1831
  function buildSeedPagePrompt(seed, rule, relatedPagesContent) {
1424
1832
  const minLinks = rule.minWikilinks;
1425
1833
  const linkExpectation = minLinks > 0 ? `Include at least ${minLinks} [[wikilinks]] to related pages.` : "Use [[wikilinks]] when referencing other pages.";
1426
1834
  return [
1427
- `You are a wiki author. Write a ${seed.kind} page titled "${seed.title}".`,
1428
- `Page-kind guidance: ${rule.description}`,
1429
- `Summary line for context: ${seed.summary}`,
1430
- "Draw facts only from the related wiki pages provided below.",
1431
- linkExpectation,
1432
- "Write in a neutral, informative tone. Be concise but thorough.",
1835
+ ...withLangLine(
1836
+ `You are a wiki author. Write a ${seed.kind} page titled "${seed.title}".`,
1837
+ `Page-kind guidance: ${rule.description}`,
1838
+ `Summary line for context: ${seed.summary}`,
1839
+ "Draw facts only from the related wiki pages provided below.",
1840
+ linkExpectation,
1841
+ "Write in a neutral, informative tone. Be concise but thorough."
1842
+ ),
1433
1843
  "\n\n--- RELATED PAGES ---\n\n",
1434
1844
  relatedPagesContent
1435
1845
  ].join("\n");
@@ -1491,8 +1901,8 @@ function buildDefaultSchema() {
1491
1901
 
1492
1902
  // src/schema/loader.ts
1493
1903
  import { existsSync as existsSync2 } from "fs";
1494
- import { readFile as readFile10 } from "fs/promises";
1495
- import path12 from "path";
1904
+ import { readFile as readFile14 } from "fs/promises";
1905
+ import path17 from "path";
1496
1906
  import yaml2 from "js-yaml";
1497
1907
  var SCHEMA_CANDIDATE_PATHS = [
1498
1908
  ".llmwiki/schema.json",
@@ -1503,7 +1913,7 @@ var SCHEMA_CANDIDATE_PATHS = [
1503
1913
  ];
1504
1914
  function findSchemaPath(root) {
1505
1915
  for (const candidate of SCHEMA_CANDIDATE_PATHS) {
1506
- const absolute = path12.join(root, candidate);
1916
+ const absolute = path17.join(root, candidate);
1507
1917
  if (existsSync2(absolute)) return absolute;
1508
1918
  }
1509
1919
  return null;
@@ -1556,12 +1966,12 @@ async function loadSchema(root) {
1556
1966
  const defaults = buildDefaultSchema();
1557
1967
  const schemaPath = findSchemaPath(root);
1558
1968
  if (!schemaPath) return defaults;
1559
- const raw = await readFile10(schemaPath, "utf-8");
1969
+ const raw = await readFile14(schemaPath, "utf-8");
1560
1970
  const parsed = parseSchemaFile(schemaPath, raw);
1561
1971
  return applyOverrides(defaults, parsed, schemaPath);
1562
1972
  }
1563
1973
  function defaultSchemaInitPath(root) {
1564
- return path12.join(root, SCHEMA_CANDIDATE_PATHS[0]);
1974
+ return path17.join(root, SCHEMA_CANDIDATE_PATHS[0]);
1565
1975
  }
1566
1976
 
1567
1977
  // src/schema/helpers.ts
@@ -1733,7 +2143,7 @@ async function freezeFailedExtractions(root, results, frozenSlugs) {
1733
2143
  }
1734
2144
 
1735
2145
  // src/compiler/orphan.ts
1736
- import path13 from "path";
2146
+ import path18 from "path";
1737
2147
  async function markOrphaned(root, sourceFile, state) {
1738
2148
  const sourceEntry = state.sources[sourceFile];
1739
2149
  if (!sourceEntry) return;
@@ -1759,7 +2169,7 @@ async function orphanUnownedFrozenPages(root, frozenSlugs) {
1759
2169
  }
1760
2170
  }
1761
2171
  async function orphanPage(root, slug, reason) {
1762
- const pagePath = path13.join(root, CONCEPTS_DIR, `${slug}.md`);
2172
+ const pagePath = path18.join(root, CONCEPTS_DIR, `${slug}.md`);
1763
2173
  const content = await safeReadFile(pagePath);
1764
2174
  if (!content) return;
1765
2175
  const { meta } = parseFrontmatter(content);
@@ -1770,18 +2180,18 @@ async function orphanPage(root, slug, reason) {
1770
2180
  }
1771
2181
 
1772
2182
  // src/compiler/resolver.ts
1773
- import { readdir as readdir2, readFile as readFile11 } from "fs/promises";
1774
- import path14 from "path";
2183
+ import { readdir as readdir3, readFile as readFile15 } from "fs/promises";
2184
+ import path19 from "path";
1775
2185
  import { existsSync as existsSync3 } from "fs";
1776
2186
  async function buildTitleIndex(root) {
1777
- const conceptsDir = path14.join(root, CONCEPTS_DIR);
2187
+ const conceptsDir = path19.join(root, CONCEPTS_DIR);
1778
2188
  if (!existsSync3(conceptsDir)) return [];
1779
- const files = await readdir2(conceptsDir);
2189
+ const files = await readdir3(conceptsDir);
1780
2190
  const pages = [];
1781
2191
  for (const file of files) {
1782
2192
  if (!file.endsWith(".md")) continue;
1783
- const filePath = path14.join(conceptsDir, file);
1784
- const content = await readFile11(filePath, "utf-8");
2193
+ const filePath = path19.join(conceptsDir, file);
2194
+ const content = await readFile15(filePath, "utf-8");
1785
2195
  const { meta } = parseFrontmatter(content);
1786
2196
  if (meta.title && typeof meta.title === "string" && !meta.orphaned) {
1787
2197
  pages.push({
@@ -1867,7 +2277,7 @@ async function resolveInboundLinks(titleIndex, newSlugs) {
1867
2277
  let count = 0;
1868
2278
  for (const page of titleIndex) {
1869
2279
  if (newSlugs.includes(page.slug)) continue;
1870
- const content = await readFile11(page.filePath, "utf-8");
2280
+ const content = await readFile15(page.filePath, "utf-8");
1871
2281
  const { body } = parseFrontmatter(content);
1872
2282
  const linked = addWikilinks(body, newTitles, page.title);
1873
2283
  if (linked !== body) {
@@ -1879,7 +2289,7 @@ async function resolveInboundLinks(titleIndex, newSlugs) {
1879
2289
  return count;
1880
2290
  }
1881
2291
  async function linkPage(page, titleIndex) {
1882
- const content = await readFile11(page.filePath, "utf-8");
2292
+ const content = await readFile15(page.filePath, "utf-8");
1883
2293
  const { body } = parseFrontmatter(content);
1884
2294
  const linked = addWikilinks(body, titleIndex, page.title);
1885
2295
  if (linked === body) return false;
@@ -1889,18 +2299,18 @@ async function linkPage(page, titleIndex) {
1889
2299
  }
1890
2300
 
1891
2301
  // src/compiler/indexgen.ts
1892
- import { readdir as readdir3 } from "fs/promises";
1893
- import path15 from "path";
2302
+ import { readdir as readdir4 } from "fs/promises";
2303
+ import path20 from "path";
1894
2304
  async function generateIndex(root) {
1895
2305
  status("*", info("Generating index..."));
1896
- const conceptsPath = path15.join(root, CONCEPTS_DIR);
1897
- const queriesPath = path15.join(root, QUERIES_DIR);
2306
+ const conceptsPath = path20.join(root, CONCEPTS_DIR);
2307
+ const queriesPath = path20.join(root, QUERIES_DIR);
1898
2308
  const concepts = await collectPageSummaries(conceptsPath);
1899
2309
  const queries = await collectPageSummaries(queriesPath);
1900
2310
  concepts.sort((a, b) => a.title.localeCompare(b.title));
1901
2311
  queries.sort((a, b) => a.title.localeCompare(b.title));
1902
2312
  const indexContent = buildIndexContent(concepts, queries);
1903
- const indexPath = path15.join(root, INDEX_FILE);
2313
+ const indexPath = path20.join(root, INDEX_FILE);
1904
2314
  await atomicWrite(indexPath, indexContent);
1905
2315
  const total = concepts.length + queries.length;
1906
2316
  status("+", success(`Index updated with ${total} pages.`));
@@ -1908,13 +2318,13 @@ async function generateIndex(root) {
1908
2318
  async function scanWikiPages(dirPath) {
1909
2319
  let files;
1910
2320
  try {
1911
- files = await readdir3(dirPath);
2321
+ files = await readdir4(dirPath);
1912
2322
  } catch {
1913
2323
  return [];
1914
2324
  }
1915
2325
  const scanned = [];
1916
2326
  for (const file of files.filter((f) => f.endsWith(".md"))) {
1917
- const content = await safeReadFile(path15.join(dirPath, file));
2327
+ const content = await safeReadFile(path20.join(dirPath, file));
1918
2328
  const { meta } = parseFrontmatter(content);
1919
2329
  scanned.push({ slug: file.replace(/\.md$/, ""), meta });
1920
2330
  }
@@ -1949,9 +2359,45 @@ function buildIndexContent(concepts, queries) {
1949
2359
  return lines.join("\n");
1950
2360
  }
1951
2361
 
2362
+ // src/compiler/prompt-budget.ts
2363
+ var TRUNCATION_MARKER = "\n\n[\u2026truncated for prompt budget \u2014 see #39\u2026]";
2364
+ function resolvePromptBudgetChars() {
2365
+ const raw = process.env[PROMPT_BUDGET_ENV_VAR];
2366
+ if (!raw) return DEFAULT_PROMPT_BUDGET_CHARS;
2367
+ const parsed = Number.parseInt(raw, 10);
2368
+ if (!Number.isFinite(parsed) || parsed <= 0) return DEFAULT_PROMPT_BUDGET_CHARS;
2369
+ return parsed;
2370
+ }
2371
+ function buildBudgetedCombinedContent(concept, slices) {
2372
+ const budget = resolvePromptBudgetChars();
2373
+ const totalRaw = slices.reduce((sum, s) => sum + s.content.length, 0);
2374
+ if (totalRaw <= budget) {
2375
+ return formatSlices(slices);
2376
+ }
2377
+ const perSource = Math.max(1, Math.floor(budget / slices.length));
2378
+ warnTruncation(concept, totalRaw, slices.length, perSource, budget);
2379
+ const trimmed = slices.map(
2380
+ (s) => s.content.length > perSource ? { ...s, content: s.content.slice(0, perSource) + TRUNCATION_MARKER } : s
2381
+ );
2382
+ return formatSlices(trimmed);
2383
+ }
2384
+ function formatSlices(slices) {
2385
+ return slices.map((s) => `--- SOURCE: ${s.file} ---
2386
+
2387
+ ${s.content}`).join("\n\n");
2388
+ }
2389
+ function warnTruncation(concept, totalRaw, sourceCount, perSource, budget) {
2390
+ status(
2391
+ "!",
2392
+ warn(
2393
+ `Combined source content for "${concept}" (${totalRaw.toLocaleString()} chars across ${sourceCount} sources) exceeds the ${budget.toLocaleString()}-char prompt budget; truncating each source to ~${perSource.toLocaleString()} chars. Raise via ${PROMPT_BUDGET_ENV_VAR} when running against larger-context models.`
2394
+ )
2395
+ );
2396
+ }
2397
+
1952
2398
  // src/compiler/obsidian.ts
1953
- import { readdir as readdir4 } from "fs/promises";
1954
- import path16 from "path";
2399
+ import { readdir as readdir5 } from "fs/promises";
2400
+ import path21 from "path";
1955
2401
  var ABBREVIATION_MIN_WORDS = 3;
1956
2402
  var SWAP_CONJUNCTIONS = [" and ", " or "];
1957
2403
  function addObsidianMeta(frontmatter, conceptTitle, tags) {
@@ -1993,23 +2439,23 @@ function generateAbbreviation(title) {
1993
2439
  return abbreviation;
1994
2440
  }
1995
2441
  async function generateMOC(root) {
1996
- const conceptsPath = path16.join(root, CONCEPTS_DIR);
2442
+ const conceptsPath = path21.join(root, CONCEPTS_DIR);
1997
2443
  const pages = await loadConceptPages(conceptsPath);
1998
2444
  const tagGroups = groupPagesByTag(pages);
1999
2445
  const content = buildMOCContent(tagGroups);
2000
- await atomicWrite(path16.join(root, MOC_FILE), content);
2446
+ await atomicWrite(path21.join(root, MOC_FILE), content);
2001
2447
  }
2002
2448
  async function loadConceptPages(conceptsPath) {
2003
2449
  let files;
2004
2450
  try {
2005
- files = await readdir4(conceptsPath);
2451
+ files = await readdir5(conceptsPath);
2006
2452
  } catch {
2007
2453
  return [];
2008
2454
  }
2009
2455
  const pages = [];
2010
2456
  for (const file of files) {
2011
2457
  if (!file.endsWith(".md")) continue;
2012
- const content = await safeReadFile(path16.join(conceptsPath, file));
2458
+ const content = await safeReadFile(path21.join(conceptsPath, file));
2013
2459
  if (!content) continue;
2014
2460
  const { meta } = parseFrontmatter(content);
2015
2461
  if (meta.orphaned) continue;
@@ -2060,14 +2506,14 @@ function buildMOCContent(tagGroups) {
2060
2506
  }
2061
2507
 
2062
2508
  // src/utils/embeddings.ts
2063
- import { readFile as readFile12, readdir as readdir5 } from "fs/promises";
2509
+ import { readFile as readFile16, readdir as readdir6 } from "fs/promises";
2064
2510
  import { existsSync as existsSync4 } from "fs";
2065
- import path17 from "path";
2511
+ import path22 from "path";
2066
2512
 
2067
2513
  // src/utils/retrieval.ts
2068
- import { createHash as createHash2 } from "crypto";
2514
+ import { createHash as createHash3 } from "crypto";
2069
2515
  function hashChunkText(text) {
2070
- return createHash2("sha256").update(text, "utf8").digest("hex").slice(0, 16);
2516
+ return createHash3("sha256").update(text, "utf8").digest("hex").slice(0, 16);
2071
2517
  }
2072
2518
  function splitIntoChunks(body) {
2073
2519
  const paragraphs = extractParagraphs(body);
@@ -2227,13 +2673,13 @@ function findTopKChunks(queryVec, chunks, k) {
2227
2673
  return scored.slice(0, k);
2228
2674
  }
2229
2675
  async function readEmbeddingStore(root) {
2230
- const filePath = path17.join(root, EMBEDDINGS_FILE);
2676
+ const filePath = path22.join(root, EMBEDDINGS_FILE);
2231
2677
  if (!existsSync4(filePath)) return null;
2232
- const raw = await readFile12(filePath, "utf-8");
2678
+ const raw = await readFile16(filePath, "utf-8");
2233
2679
  return JSON.parse(raw);
2234
2680
  }
2235
2681
  async function writeEmbeddingStore(root, store) {
2236
- const filePath = path17.join(root, EMBEDDINGS_FILE);
2682
+ const filePath = path22.join(root, EMBEDDINGS_FILE);
2237
2683
  await atomicWrite(filePath, JSON.stringify(store, null, 2));
2238
2684
  }
2239
2685
  async function findRelevantPages(root, question) {
@@ -2265,10 +2711,10 @@ async function loadActiveStore(root, hasContent) {
2265
2711
  async function collectPageRecords(root) {
2266
2712
  const records = [];
2267
2713
  for (const dir of [CONCEPTS_DIR, QUERIES_DIR]) {
2268
- const absDir = path17.join(root, dir);
2714
+ const absDir = path22.join(root, dir);
2269
2715
  let files;
2270
2716
  try {
2271
- files = await readdir5(absDir);
2717
+ files = await readdir6(absDir);
2272
2718
  } catch {
2273
2719
  continue;
2274
2720
  }
@@ -2280,7 +2726,7 @@ async function collectPageRecords(root) {
2280
2726
  return records;
2281
2727
  }
2282
2728
  async function readPageRecord(absDir, file) {
2283
- const content = await safeReadFile(path17.join(absDir, file));
2729
+ const content = await safeReadFile(path22.join(absDir, file));
2284
2730
  const { meta, body } = parseFrontmatter(content);
2285
2731
  if (meta.orphaned || typeof meta.title !== "string") return null;
2286
2732
  return {
@@ -2442,9 +2888,9 @@ function shouldRunEmbedding(modelChanged, toEmbed, previousEntries, previousChun
2442
2888
  }
2443
2889
 
2444
2890
  // src/compiler/candidates.ts
2445
- import { readdir as readdir6, rename as rename3, unlink as unlink2, writeFile as writeFile4, mkdir as mkdir5 } from "fs/promises";
2891
+ import { readdir as readdir7, rename as rename3, unlink as unlink2, writeFile as writeFile4, mkdir as mkdir5 } from "fs/promises";
2446
2892
  import { existsSync as existsSync5 } from "fs";
2447
- import path18 from "path";
2893
+ import path23 from "path";
2448
2894
  import { randomBytes } from "crypto";
2449
2895
  var ID_SUFFIX_BYTES = 4;
2450
2896
  var CANDIDATE_EXT = ".json";
@@ -2453,10 +2899,10 @@ function buildCandidateId(slug) {
2453
2899
  return `${slug}-${suffix}`;
2454
2900
  }
2455
2901
  function candidatePath(root, id) {
2456
- return path18.join(root, CANDIDATES_DIR, `${id}${CANDIDATE_EXT}`);
2902
+ return path23.join(root, CANDIDATES_DIR, `${id}${CANDIDATE_EXT}`);
2457
2903
  }
2458
2904
  function archivePath(root, id) {
2459
- return path18.join(root, CANDIDATES_ARCHIVE_DIR, `${id}${CANDIDATE_EXT}`);
2905
+ return path23.join(root, CANDIDATES_ARCHIVE_DIR, `${id}${CANDIDATE_EXT}`);
2460
2906
  }
2461
2907
  async function writeCandidate(root, draft) {
2462
2908
  const candidate = {
@@ -2468,7 +2914,8 @@ async function writeCandidate(root, draft) {
2468
2914
  body: draft.body,
2469
2915
  generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
2470
2916
  ...draft.sourceStates ? { sourceStates: draft.sourceStates } : {},
2471
- ...draft.schemaViolations ? { schemaViolations: draft.schemaViolations } : {}
2917
+ ...draft.schemaViolations ? { schemaViolations: draft.schemaViolations } : {},
2918
+ ...draft.provenanceViolations ? { provenanceViolations: draft.provenanceViolations } : {}
2472
2919
  };
2473
2920
  await atomicWrite(candidatePath(root, candidate.id), JSON.stringify(candidate, null, 2));
2474
2921
  return candidate;
@@ -2507,9 +2954,9 @@ function isValidCandidate(value) {
2507
2954
  return typeof candidate.id === "string" && typeof candidate.title === "string" && typeof candidate.slug === "string" && typeof candidate.body === "string" && Array.isArray(candidate.sources);
2508
2955
  }
2509
2956
  async function listCandidates(root) {
2510
- const dir = path18.join(root, CANDIDATES_DIR);
2957
+ const dir = path23.join(root, CANDIDATES_DIR);
2511
2958
  if (!existsSync5(dir)) return [];
2512
- const entries = await readdir6(dir, { withFileTypes: true });
2959
+ const entries = await readdir7(dir, { withFileTypes: true });
2513
2960
  const candidates = [];
2514
2961
  for (const entry of entries) {
2515
2962
  if (!entry.isFile() || !entry.name.endsWith(CANDIDATE_EXT)) continue;
@@ -2534,7 +2981,7 @@ async function archiveCandidate(root, id) {
2534
2981
  const sourcePath = candidatePath(root, id);
2535
2982
  if (!existsSync5(sourcePath)) return false;
2536
2983
  const target = archivePath(root, id);
2537
- await mkdir5(path18.dirname(target), { recursive: true });
2984
+ await mkdir5(path23.dirname(target), { recursive: true });
2538
2985
  try {
2539
2986
  await rename3(sourcePath, target);
2540
2987
  } catch {
@@ -2546,9 +2993,9 @@ async function archiveCandidate(root, id) {
2546
2993
  }
2547
2994
 
2548
2995
  // src/linter/rules.ts
2549
- import { readdir as readdir7, readFile as readFile13 } from "fs/promises";
2996
+ import { readdir as readdir8, readFile as readFile17 } from "fs/promises";
2550
2997
  import { existsSync as existsSync6 } from "fs";
2551
- import path19 from "path";
2998
+ import path24 from "path";
2552
2999
  var MIN_BODY_LENGTH = 50;
2553
3000
  var WIKILINK_PATTERN2 = /\[\[([^\]]+)\]\]/g;
2554
3001
  var CITATION_PATTERN = /\^\[([^\]]+)\]/g;
@@ -2565,26 +3012,26 @@ function findMatchesInContent(content, pattern) {
2565
3012
  }
2566
3013
  async function readMarkdownFiles(dirPath) {
2567
3014
  if (!existsSync6(dirPath)) return [];
2568
- const entries = await readdir7(dirPath);
3015
+ const entries = await readdir8(dirPath);
2569
3016
  const mdFiles = entries.filter((f) => f.endsWith(".md"));
2570
3017
  const results = await Promise.all(
2571
3018
  mdFiles.map(async (fileName) => {
2572
- const filePath = path19.join(dirPath, fileName);
2573
- const content = await readFile13(filePath, "utf-8");
3019
+ const filePath = path24.join(dirPath, fileName);
3020
+ const content = await readFile17(filePath, "utf-8");
2574
3021
  return { filePath, content };
2575
3022
  })
2576
3023
  );
2577
3024
  return results;
2578
3025
  }
2579
3026
  async function collectAllPages(root) {
2580
- const conceptPages = await readMarkdownFiles(path19.join(root, CONCEPTS_DIR));
2581
- const queryPages = await readMarkdownFiles(path19.join(root, QUERIES_DIR));
3027
+ const conceptPages = await readMarkdownFiles(path24.join(root, CONCEPTS_DIR));
3028
+ const queryPages = await readMarkdownFiles(path24.join(root, QUERIES_DIR));
2582
3029
  return [...conceptPages, ...queryPages];
2583
3030
  }
2584
3031
  function buildPageSlugSet(pages) {
2585
3032
  const slugs = /* @__PURE__ */ new Set();
2586
3033
  for (const page of pages) {
2587
- const baseName = path19.basename(page.filePath, ".md");
3034
+ const baseName = path24.basename(page.filePath, ".md");
2588
3035
  slugs.add(baseName.toLowerCase());
2589
3036
  }
2590
3037
  return slugs;
@@ -2731,9 +3178,8 @@ async function checkInferredWithoutCitations(root) {
2731
3178
  const pages = await collectAllPages(root);
2732
3179
  const results = [];
2733
3180
  for (const page of pages) {
2734
- const { meta, body } = parseFrontmatter(page.content);
2735
- const provenance = parseProvenanceMetadata(meta);
2736
- const inferred = provenance.inferredParagraphs ?? countUncitedProseParagraphs(body);
3181
+ const { body } = parseFrontmatter(page.content);
3182
+ const inferred = countUncitedProseParagraphs(body);
2737
3183
  if (inferred <= MAX_INFERRED_PARAGRAPHS_WITHOUT_CITATIONS) continue;
2738
3184
  results.push({
2739
3185
  rule: "excess-inferred-paragraphs",
@@ -2744,7 +3190,7 @@ async function checkInferredWithoutCitations(root) {
2744
3190
  }
2745
3191
  return results;
2746
3192
  }
2747
- var PROSE_PARAGRAPH_LEAD = /^[A-Za-z]/;
3193
+ var PROSE_PARAGRAPH_LEAD = new RegExp("^\\p{L}", "u");
2748
3194
  function countUncitedProseParagraphs(body) {
2749
3195
  const paragraphs = body.split(/\n\s*\n/);
2750
3196
  let count = 0;
@@ -2767,18 +3213,7 @@ async function checkSchemaCrossLinks(root, schema) {
2767
3213
  const pages = await collectAllPages(root);
2768
3214
  const results = [];
2769
3215
  for (const page of pages) {
2770
- const { meta, body } = parseFrontmatter(page.content);
2771
- const kind = resolvePageKind(meta.kind, schema);
2772
- const rule = schema.kinds[kind];
2773
- if (rule.minWikilinks <= 0) continue;
2774
- const linkCount = countWikilinks(body);
2775
- if (linkCount >= rule.minWikilinks) continue;
2776
- results.push({
2777
- rule: "schema-cross-link-minimum",
2778
- severity: "warning",
2779
- file: page.filePath,
2780
- message: `Page kind "${kind}" requires at least ${rule.minWikilinks} [[wikilinks]] but only ${linkCount} found.`
2781
- });
3216
+ results.push(...checkPageCrossLinks(page.content, page.filePath, schema));
2782
3217
  }
2783
3218
  return results;
2784
3219
  }
@@ -2819,13 +3254,24 @@ function countLines(content) {
2819
3254
  }
2820
3255
  async function checkBrokenCitations(root) {
2821
3256
  const pages = await collectAllPages(root);
2822
- const sourcesDir = path19.join(root, SOURCES_DIR);
3257
+ const sourcesDir = path24.join(root, SOURCES_DIR);
2823
3258
  const results = [];
2824
3259
  const lineCountCache = /* @__PURE__ */ new Map();
2825
3260
  for (const page of pages) {
2826
- for (const { captured, line } of findMatchesInContent(page.content, CITATION_PATTERN)) {
2827
- await collectBrokenForMarker(captured, line, page.filePath, sourcesDir, lineCountCache, results);
2828
- }
3261
+ const pageFindings = await checkPageBrokenCitations(
3262
+ page.content,
3263
+ page.filePath,
3264
+ sourcesDir,
3265
+ lineCountCache
3266
+ );
3267
+ results.push(...pageFindings);
3268
+ }
3269
+ return results;
3270
+ }
3271
+ async function checkPageBrokenCitations(content, filePath, sourcesDir, lineCountCache = /* @__PURE__ */ new Map()) {
3272
+ const results = [];
3273
+ for (const { captured, line } of findMatchesInContent(content, CITATION_PATTERN)) {
3274
+ await collectBrokenForMarker(captured, line, filePath, sourcesDir, lineCountCache, results);
2829
3275
  }
2830
3276
  return results;
2831
3277
  }
@@ -2834,7 +3280,7 @@ async function collectBrokenForMarker(captured, line, pageFile, sourcesDir, line
2834
3280
  const trimmed = part.trim();
2835
3281
  if (trimmed.length === 0) continue;
2836
3282
  const filename = stripSpanSuffix(trimmed);
2837
- const citedPath = path19.join(sourcesDir, filename);
3283
+ const citedPath = path24.join(sourcesDir, filename);
2838
3284
  if (!existsSync6(citedPath)) {
2839
3285
  out.push({
2840
3286
  rule: "broken-citation",
@@ -2870,25 +3316,30 @@ async function checkMalformedClaimCitations(root) {
2870
3316
  const pages = await collectAllPages(root);
2871
3317
  const results = [];
2872
3318
  for (const page of pages) {
2873
- for (const { captured, line } of findMatchesInContent(page.content, CITATION_PATTERN)) {
2874
- for (const part of captured.split(",")) {
2875
- if (!isMalformedCitationEntry(part)) continue;
2876
- results.push({
2877
- rule: "malformed-claim-citation",
2878
- severity: "error",
2879
- file: page.filePath,
2880
- message: `Malformed claim citation ^[${captured}] \u2014 expected file.md, file.md:N-N, or file.md#LN-LN`,
2881
- line
2882
- });
2883
- }
3319
+ results.push(...checkPageMalformedCitations(page.content, page.filePath));
3320
+ }
3321
+ return results;
3322
+ }
3323
+ function checkPageMalformedCitations(content, filePath) {
3324
+ const results = [];
3325
+ for (const { captured, line } of findMatchesInContent(content, CITATION_PATTERN)) {
3326
+ for (const part of captured.split(",")) {
3327
+ if (!isMalformedCitationEntry(part)) continue;
3328
+ results.push({
3329
+ rule: "malformed-claim-citation",
3330
+ severity: "error",
3331
+ file: filePath,
3332
+ message: `Malformed claim citation ^[${captured}] \u2014 expected file.md, file.md:N-N, or file.md#LN-LN`,
3333
+ line
3334
+ });
2884
3335
  }
2885
3336
  }
2886
3337
  return results;
2887
3338
  }
2888
3339
 
2889
3340
  // src/compiler/page-renderer.ts
2890
- import { readdir as readdir8 } from "fs/promises";
2891
- import path20 from "path";
3341
+ import { readdir as readdir9 } from "fs/promises";
3342
+ import path25 from "path";
2892
3343
 
2893
3344
  // src/compiler/provenance.ts
2894
3345
  function addProvenanceMeta(fields, concept) {
@@ -2901,9 +3352,6 @@ function addProvenanceMeta(fields, concept) {
2901
3352
  if (concept.contradictedBy && concept.contradictedBy.length > 0) {
2902
3353
  fields.contradictedBy = concept.contradictedBy;
2903
3354
  }
2904
- if (typeof concept.inferredParagraphs === "number") {
2905
- fields.inferredParagraphs = concept.inferredParagraphs;
2906
- }
2907
3355
  }
2908
3356
  function reportContradictionWarnings(conceptTitle, concept) {
2909
3357
  const refs = concept.contradictedBy;
@@ -2918,7 +3366,7 @@ function reportContradictionWarnings(conceptTitle, concept) {
2918
3366
  // src/compiler/page-renderer.ts
2919
3367
  var RELATED_PAGE_CONTEXT_LIMIT = 5;
2920
3368
  async function renderMergedPageContent(root, entry, schema) {
2921
- const pagePath = path20.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
3369
+ const pagePath = path25.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
2922
3370
  const existingPage = await safeReadFile(pagePath);
2923
3371
  const relatedPages = await loadRelatedPages(root, entry.slug);
2924
3372
  const system = buildPagePrompt(
@@ -2957,17 +3405,17 @@ function buildMergedFrontmatter(entry, existingPage, schema) {
2957
3405
  return buildFrontmatter(frontmatterFields);
2958
3406
  }
2959
3407
  async function loadRelatedPages(root, excludeSlug) {
2960
- const conceptsPath = path20.join(root, CONCEPTS_DIR);
3408
+ const conceptsPath = path25.join(root, CONCEPTS_DIR);
2961
3409
  let files;
2962
3410
  try {
2963
- files = await readdir8(conceptsPath);
3411
+ files = await readdir9(conceptsPath);
2964
3412
  } catch {
2965
3413
  return "";
2966
3414
  }
2967
3415
  const related = files.filter((f) => f.endsWith(".md") && f !== `${excludeSlug}.md`).slice(0, RELATED_PAGE_CONTEXT_LIMIT);
2968
3416
  const contents = [];
2969
3417
  for (const f of related) {
2970
- const content = await safeReadFile(path20.join(conceptsPath, f));
3418
+ const content = await safeReadFile(path25.join(conceptsPath, f));
2971
3419
  if (!content) continue;
2972
3420
  const { meta } = parseFrontmatter(content);
2973
3421
  if (meta.orphaned) continue;
@@ -3021,7 +3469,7 @@ async function generatePagesPhase(root, extractions, frozenSlugs, schema, option
3021
3469
  return entry;
3022
3470
  }))
3023
3471
  );
3024
- return { pages, errors, candidates };
3472
+ return { pages, errors, candidates, seedSlugs: [] };
3025
3473
  }
3026
3474
  async function persistExtractionStates(root, extractions) {
3027
3475
  for (const result of extractions) {
@@ -3047,12 +3495,13 @@ function summarizeCompile(buckets, generation, extractions, options) {
3047
3495
  errors.push(`No concepts extracted from ${result.sourceFile}`);
3048
3496
  }
3049
3497
  }
3498
+ const conceptSlugs = generation.pages.map((entry) => entry.slug);
3050
3499
  const baseResult = {
3051
3500
  compiled: buckets.toCompile.length,
3052
3501
  skipped: buckets.unchanged.length,
3053
3502
  deleted: buckets.deleted.length,
3054
3503
  concepts: generation.pages.map((entry) => entry.concept.concept),
3055
- pages: generation.pages.map((entry) => entry.slug),
3504
+ pages: [...conceptSlugs, ...generation.seedSlugs],
3056
3505
  errors
3057
3506
  };
3058
3507
  if (options.review) {
@@ -3070,12 +3519,21 @@ async function runCompilePipeline(root, options) {
3070
3519
  if (buckets.toCompile.length === 0 && buckets.deleted.length === 0) {
3071
3520
  status("\u2713", success("Nothing to compile \u2014 all sources up to date."));
3072
3521
  if (!options.review) {
3073
- const emptyGeneration = { pages: [], errors: [], candidates: [] };
3522
+ const emptyGeneration = {
3523
+ pages: [],
3524
+ errors: [],
3525
+ candidates: [],
3526
+ seedSlugs: []
3527
+ };
3074
3528
  await generateSeedPages(root, schema, emptyGeneration);
3075
- await finalizeWiki(root, emptyGeneration.pages);
3529
+ await finalizeWiki(root, emptyGeneration.pages, emptyGeneration.seedSlugs);
3076
3530
  return {
3077
3531
  ...emptyCompileResult(),
3078
3532
  skipped: buckets.unchanged.length,
3533
+ // Surface seed-page slugs alongside any errors so downstream
3534
+ // consumers (MCP, embeddings, programmatic callers) can see what
3535
+ // landed even on the no-source-changes early-return path.
3536
+ pages: [...emptyGeneration.seedSlugs],
3079
3537
  errors: emptyGeneration.errors
3080
3538
  };
3081
3539
  }
@@ -3099,7 +3557,7 @@ async function runCompilePipeline(root, options) {
3099
3557
  }
3100
3558
  await persistFrozenSlugs(root, frozenSlugs, extractions);
3101
3559
  await generateSeedPages(root, schema, generation);
3102
- await finalizeWiki(root, generation.pages);
3560
+ await finalizeWiki(root, generation.pages, generation.seedSlugs);
3103
3561
  }
3104
3562
  return summarizeCompile(buckets, generation, extractions, options);
3105
3563
  }
@@ -3136,9 +3594,11 @@ async function runExtractionPhases(root, toCompile, state, allChanges) {
3136
3594
  }
3137
3595
  return extractions;
3138
3596
  }
3139
- async function finalizeWiki(root, pages) {
3140
- const allChangedSlugs = pages.map((entry) => entry.slug);
3141
- const allNewSlugs = pages.filter((entry) => entry.concept.is_new).map((entry) => entry.slug);
3597
+ async function finalizeWiki(root, pages, seedSlugs = []) {
3598
+ const conceptChangedSlugs = pages.map((entry) => entry.slug);
3599
+ const conceptNewSlugs = pages.filter((entry) => entry.concept.is_new).map((entry) => entry.slug);
3600
+ const allChangedSlugs = [...conceptChangedSlugs, ...seedSlugs];
3601
+ const allNewSlugs = [...conceptNewSlugs, ...seedSlugs];
3142
3602
  if (allChangedSlugs.length > 0) {
3143
3603
  status("\u{1F517}", info("Resolving interlinks..."));
3144
3604
  await resolveLinks(root, allChangedSlugs, allNewSlugs);
@@ -3168,9 +3628,9 @@ function printChangesSummary(changes) {
3168
3628
  }
3169
3629
  async function extractForSource(root, sourceFile) {
3170
3630
  status("*", info(`Extracting: ${sourceFile}`));
3171
- const sourcePath = path21.join(root, SOURCES_DIR, sourceFile);
3172
- const sourceContent = await readFile14(sourcePath, "utf-8");
3173
- const existingIndex = await safeReadFile(path21.join(root, INDEX_FILE));
3631
+ const sourcePath = path26.join(root, SOURCES_DIR, sourceFile);
3632
+ const sourceContent = await readFile18(sourcePath, "utf-8");
3633
+ const existingIndex = await safeReadFile(path26.join(root, INDEX_FILE));
3174
3634
  const concepts = await extractConcepts(sourceContent, existingIndex);
3175
3635
  if (concepts.length > 0) {
3176
3636
  const names = concepts.map((c) => c.concept).join(", ");
@@ -3193,13 +3653,11 @@ function reconcileConceptMetadata(existing, incoming) {
3193
3653
  }
3194
3654
  }
3195
3655
  reconciled.contradictedBy = refs.length > 0 ? refs : void 0;
3196
- if (typeof incoming.inferredParagraphs === "number") {
3197
- reconciled.inferredParagraphs = typeof existing.inferredParagraphs === "number" ? Math.max(existing.inferredParagraphs, incoming.inferredParagraphs) : incoming.inferredParagraphs;
3198
- }
3199
3656
  return reconciled;
3200
3657
  }
3201
3658
  function mergeExtractions(extractions, frozenSlugs) {
3202
3659
  const bySlug = /* @__PURE__ */ new Map();
3660
+ const slicesBySlug = /* @__PURE__ */ new Map();
3203
3661
  for (const result of extractions) {
3204
3662
  if (result.concepts.length === 0) continue;
3205
3663
  for (const concept of result.concepts) {
@@ -3209,23 +3667,28 @@ function mergeExtractions(extractions, frozenSlugs) {
3209
3667
  if (existing) {
3210
3668
  existing.concept = reconcileConceptMetadata(existing.concept, concept);
3211
3669
  existing.sourceFiles.push(result.sourceFile);
3212
- existing.combinedContent += `
3213
-
3214
- --- SOURCE: ${result.sourceFile} ---
3215
-
3216
- ${result.sourceContent}`;
3217
3670
  } else {
3218
3671
  bySlug.set(slug, {
3219
3672
  slug,
3220
3673
  concept,
3221
3674
  sourceFiles: [result.sourceFile],
3222
- combinedContent: `--- SOURCE: ${result.sourceFile} ---
3223
-
3224
- ${result.sourceContent}`
3675
+ combinedContent: ""
3225
3676
  });
3677
+ slicesBySlug.set(slug, []);
3226
3678
  }
3679
+ slicesBySlug.get(slug).push({
3680
+ file: result.sourceFile,
3681
+ content: result.sourceContent
3682
+ });
3227
3683
  }
3228
3684
  }
3685
+ for (const merged of bySlug.values()) {
3686
+ const slices = slicesBySlug.get(merged.slug) ?? [];
3687
+ merged.combinedContent = buildBudgetedCombinedContent(
3688
+ merged.concept.concept,
3689
+ slices
3690
+ );
3691
+ }
3229
3692
  return Array.from(bySlug.values());
3230
3693
  }
3231
3694
  async function generateMergedPage(root, entry, schema, options, sourceStates) {
@@ -3233,13 +3696,18 @@ async function generateMergedPage(root, entry, schema, options, sourceStates) {
3233
3696
  if (options.review) {
3234
3697
  return await persistReviewCandidate(root, entry, fullPage, sourceStates, schema);
3235
3698
  }
3236
- const pagePath = path21.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
3699
+ const pagePath = path26.join(root, CONCEPTS_DIR, `${entry.slug}.md`);
3237
3700
  const error2 = await writePageIfValid(pagePath, fullPage, entry.concept.concept);
3238
3701
  return { error: error2 ?? void 0 };
3239
3702
  }
3240
3703
  async function persistReviewCandidate(root, entry, fullPage, sourceStates, schema) {
3241
3704
  const virtualPath = `wiki/concepts/${entry.slug}.md`;
3242
- const violations = checkPageCrossLinks(fullPage, virtualPath, schema);
3705
+ const schemaViolations = checkPageCrossLinks(fullPage, virtualPath, schema);
3706
+ const provenanceViolations = await collectCandidateProvenanceViolations(
3707
+ root,
3708
+ fullPage,
3709
+ virtualPath
3710
+ );
3243
3711
  const candidate = await writeCandidate(root, {
3244
3712
  title: entry.concept.concept,
3245
3713
  slug: entry.slug,
@@ -3247,21 +3715,35 @@ async function persistReviewCandidate(root, entry, fullPage, sourceStates, schem
3247
3715
  sources: entry.sourceFiles,
3248
3716
  body: fullPage,
3249
3717
  sourceStates: pickStatesForSources(sourceStates, entry.sourceFiles),
3250
- schemaViolations: violations.length > 0 ? violations : void 0
3718
+ schemaViolations: schemaViolations.length > 0 ? schemaViolations : void 0,
3719
+ provenanceViolations: provenanceViolations.length > 0 ? provenanceViolations : void 0
3251
3720
  });
3252
3721
  status("?", info(`Candidate ready: ${candidate.id} (${entry.slug})`));
3253
3722
  return { candidateId: candidate.id };
3254
3723
  }
3724
+ async function collectCandidateProvenanceViolations(root, fullPage, virtualPath) {
3725
+ const malformed = checkPageMalformedCitations(fullPage, virtualPath);
3726
+ const broken = await checkPageBrokenCitations(
3727
+ fullPage,
3728
+ virtualPath,
3729
+ path26.join(root, SOURCES_DIR)
3730
+ );
3731
+ return [...malformed, ...broken];
3732
+ }
3255
3733
  async function generateSeedPages(root, schema, generation) {
3256
3734
  if (schema.seedPages.length === 0) return;
3257
3735
  for (const seed of schema.seedPages) {
3258
- const error2 = await generateSingleSeedPage(root, schema, seed);
3259
- if (error2) generation.errors.push(error2);
3736
+ const result = await generateSingleSeedPage(root, schema, seed);
3737
+ if (result.error) {
3738
+ generation.errors.push(result.error);
3739
+ continue;
3740
+ }
3741
+ generation.seedSlugs.push(result.slug);
3260
3742
  }
3261
3743
  }
3262
3744
  async function generateSingleSeedPage(root, schema, seed) {
3263
3745
  const slug = slugify(seed.title);
3264
- const pagePath = path21.join(root, CONCEPTS_DIR, `${slug}.md`);
3746
+ const pagePath = path26.join(root, CONCEPTS_DIR, `${slug}.md`);
3265
3747
  const relatedContent = await loadSeedRelatedPages(root, seed.relatedSlugs ?? []);
3266
3748
  const rule = schema.kinds[seed.kind];
3267
3749
  const system = buildSeedPagePrompt(seed, rule, relatedContent);
@@ -3284,16 +3766,17 @@ async function generateSingleSeedPage(root, schema, seed) {
3284
3766
  const frontmatterFields = { ...typedFields };
3285
3767
  addObsidianMeta(frontmatterFields, seed.title, []);
3286
3768
  const frontmatter = buildFrontmatter(frontmatterFields);
3287
- return await writePageIfValid(pagePath, `${frontmatter}
3769
+ const error2 = await writePageIfValid(pagePath, `${frontmatter}
3288
3770
 
3289
3771
  ${pageBody}
3290
3772
  `, seed.title);
3773
+ return error2 ? { slug, error: error2 } : { slug };
3291
3774
  }
3292
3775
  async function loadSeedRelatedPages(root, slugs) {
3293
3776
  if (slugs.length === 0) return "";
3294
3777
  const contents = [];
3295
3778
  for (const slug of slugs) {
3296
- const pagePath = path21.join(root, CONCEPTS_DIR, `${slug}.md`);
3779
+ const pagePath = path26.join(root, CONCEPTS_DIR, `${slug}.md`);
3297
3780
  const content = await safeReadFile(pagePath);
3298
3781
  if (content) contents.push(content);
3299
3782
  }
@@ -3348,7 +3831,7 @@ async function compileCommand(options = {}) {
3348
3831
 
3349
3832
  // src/commands/query.ts
3350
3833
  import { existsSync as existsSync8 } from "fs";
3351
- import path22 from "path";
3834
+ import path27 from "path";
3352
3835
  var PAGE_DIRS = [CONCEPTS_DIR, QUERIES_DIR];
3353
3836
  var PAGE_SELECTION_TOOL = {
3354
3837
  name: "select_pages",
@@ -3405,7 +3888,7 @@ async function selectRelevantPages(root, question, debug) {
3405
3888
  const { pages: rawPages2, reasoning: reasoning2 } = await selectPages(question, filteredIndex);
3406
3889
  return { pages: rawPages2, rawPages: rawPages2, reasoning: reasoning2, chunks: [] };
3407
3890
  }
3408
- const indexContent = await safeReadFile(path22.join(root, INDEX_FILE));
3891
+ const indexContent = await safeReadFile(path27.join(root, INDEX_FILE));
3409
3892
  const { pages: rawPages, reasoning } = await selectPages(question, indexContent);
3410
3893
  return { pages: rawPages.map((p) => slugify(p)), rawPages, reasoning, chunks: [] };
3411
3894
  }
@@ -3497,7 +3980,7 @@ async function loadSelectedPages(root, slugs) {
3497
3980
  for (const slug of slugs) {
3498
3981
  let content = "";
3499
3982
  for (const dir of PAGE_DIRS) {
3500
- const candidate = await safeReadFile(path22.join(root, dir, `${slug}.md`));
3983
+ const candidate = await safeReadFile(path27.join(root, dir, `${slug}.md`));
3501
3984
  if (!candidate) continue;
3502
3985
  const { meta } = parseFrontmatter(candidate);
3503
3986
  if (meta.orphaned) continue;
@@ -3513,7 +3996,11 @@ ${content}`);
3513
3996
  }
3514
3997
  return sections.join("\n\n");
3515
3998
  }
3516
- var ANSWER_SYSTEM_PROMPT = "You are a knowledge assistant. Answer the question using ONLY the wiki content provided. Cite specific pages using [[Page Title]] wikilinks. If the wiki doesn't contain enough information, say so.";
3999
+ var ANSWER_SYSTEM_PROMPT_BASE = "You are a knowledge assistant. Answer the question using ONLY the wiki content provided. Cite specific pages using [[Page Title]] wikilinks. If the wiki doesn't contain enough information, say so.";
4000
+ function buildAnswerSystemPrompt() {
4001
+ const lang = languageDirective();
4002
+ return lang ? `${ANSWER_SYSTEM_PROMPT_BASE} ${lang}` : ANSWER_SYSTEM_PROMPT_BASE;
4003
+ }
3517
4004
  async function callAnswerLLM(question, pagesContent, chunks, onToken) {
3518
4005
  const provenance = chunks.length > 0 ? buildChunkProvenance(chunks) : "";
3519
4006
  const userMessage = `Question: ${question}
@@ -3521,7 +4008,7 @@ async function callAnswerLLM(question, pagesContent, chunks, onToken) {
3521
4008
  Relevant wiki pages:
3522
4009
  ${pagesContent}${provenance}`;
3523
4010
  return callClaude({
3524
- system: ANSWER_SYSTEM_PROMPT,
4011
+ system: buildAnswerSystemPrompt(),
3525
4012
  messages: [{ role: "user", content: userMessage }],
3526
4013
  stream: Boolean(onToken),
3527
4014
  onToken
@@ -3544,7 +4031,7 @@ function summarizeAnswer(answer) {
3544
4031
  }
3545
4032
  async function saveQueryPage(root, question, answer) {
3546
4033
  const slug = slugify(question);
3547
- const filePath = path22.join(root, QUERIES_DIR, `${slug}.md`);
4034
+ const filePath = path27.join(root, QUERIES_DIR, `${slug}.md`);
3548
4035
  const frontmatter = buildFrontmatter({
3549
4036
  title: question,
3550
4037
  summary: summarizeAnswer(answer),
@@ -3570,7 +4057,7 @@ ${answer}
3570
4057
  return slug;
3571
4058
  }
3572
4059
  async function generateAnswer(root, question, options = {}) {
3573
- if (!existsSync8(path22.join(root, INDEX_FILE))) {
4060
+ if (!existsSync8(path27.join(root, INDEX_FILE))) {
3574
4061
  throw new Error("Wiki index not found. Run `llmwiki compile` first.");
3575
4062
  }
3576
4063
  const selection = await selectRelevantPages(root, question, Boolean(options.debug));
@@ -3598,7 +4085,7 @@ function buildEmptyResult(selection) {
3598
4085
  };
3599
4086
  }
3600
4087
  async function queryCommand(root, question, options) {
3601
- if (!existsSync8(path22.join(root, INDEX_FILE))) {
4088
+ if (!existsSync8(path27.join(root, INDEX_FILE))) {
3602
4089
  status("!", error("Wiki index not found. Run `llmwiki compile` first."));
3603
4090
  return;
3604
4091
  }
@@ -3649,10 +4136,10 @@ var DEBUG_CHUNK_PREVIEW_CHARS = 120;
3649
4136
  // src/commands/watch.ts
3650
4137
  import { watch as chokidarWatch } from "chokidar";
3651
4138
  import { existsSync as existsSync9 } from "fs";
3652
- import path23 from "path";
4139
+ import path28 from "path";
3653
4140
  var DEBOUNCE_MS = 500;
3654
4141
  async function watchCommand() {
3655
- const sourcesPath = path23.resolve(SOURCES_DIR);
4142
+ const sourcesPath = path28.resolve(SOURCES_DIR);
3656
4143
  if (!existsSync9(sourcesPath)) {
3657
4144
  status(
3658
4145
  "!",
@@ -3687,7 +4174,7 @@ async function watchCommand() {
3687
4174
  const scheduleCompile = (eventPath, event) => {
3688
4175
  status(
3689
4176
  "~",
3690
- dim(`${event}: ${path23.basename(eventPath)}`)
4177
+ dim(`${event}: ${path28.basename(eventPath)}`)
3691
4178
  );
3692
4179
  if (debounceTimer) clearTimeout(debounceTimer);
3693
4180
  debounceTimer = setTimeout(triggerCompile, DEBOUNCE_MS);
@@ -3771,10 +4258,388 @@ async function lintCommand() {
3771
4258
  }
3772
4259
  }
3773
4260
 
4261
+ // src/commands/export.ts
4262
+ import path30 from "path";
4263
+ import { createRequire } from "module";
4264
+
4265
+ // src/export/collect.ts
4266
+ import { readdir as readdir10, readFile as readFile19 } from "fs/promises";
4267
+ import path29 from "path";
4268
+ var WIKILINK_RE = /\[\[([^\]|]+)(?:\|[^\]]+)?\]\]/g;
4269
+ function extractWikilinkSlugs(body) {
4270
+ const slugs = /* @__PURE__ */ new Set();
4271
+ let match;
4272
+ while ((match = WIKILINK_RE.exec(body)) !== null) {
4273
+ slugs.add(slugify(match[1].trim()));
4274
+ }
4275
+ return [...slugs];
4276
+ }
4277
+ async function parsePageFile(filePath, slug, pageDirectory) {
4278
+ let raw;
4279
+ try {
4280
+ raw = await readFile19(filePath, "utf-8");
4281
+ } catch {
4282
+ return null;
4283
+ }
4284
+ const { meta, body } = parseFrontmatter(raw);
4285
+ if (!meta.title || typeof meta.title !== "string") return null;
4286
+ if (meta.orphaned === true) return null;
4287
+ return {
4288
+ title: meta.title,
4289
+ slug,
4290
+ pageDirectory,
4291
+ summary: typeof meta.summary === "string" ? meta.summary : "",
4292
+ sources: Array.isArray(meta.sources) ? meta.sources.filter((s) => typeof s === "string") : [],
4293
+ tags: Array.isArray(meta.tags) ? meta.tags.filter((t) => typeof t === "string") : [],
4294
+ createdAt: typeof meta.createdAt === "string" ? meta.createdAt : (/* @__PURE__ */ new Date()).toISOString(),
4295
+ updatedAt: typeof meta.updatedAt === "string" ? meta.updatedAt : (/* @__PURE__ */ new Date()).toISOString(),
4296
+ links: extractWikilinkSlugs(body),
4297
+ body
4298
+ };
4299
+ }
4300
+ async function collectFromDir(dirPath, pageDirectory) {
4301
+ let files;
4302
+ try {
4303
+ files = await readdir10(dirPath);
4304
+ } catch {
4305
+ return [];
4306
+ }
4307
+ const pages = [];
4308
+ for (const file of files.filter((f) => f.endsWith(".md"))) {
4309
+ const slug = file.replace(/\.md$/, "");
4310
+ const page = await parsePageFile(path29.join(dirPath, file), slug, pageDirectory);
4311
+ if (page) pages.push(page);
4312
+ }
4313
+ return pages;
4314
+ }
4315
+ async function collectExportPages(root) {
4316
+ const conceptsPath = path29.join(root, CONCEPTS_DIR);
4317
+ const queriesPath = path29.join(root, QUERIES_DIR);
4318
+ const [concepts, queries] = await Promise.all([
4319
+ collectFromDir(conceptsPath, "concepts"),
4320
+ collectFromDir(queriesPath, "queries")
4321
+ ]);
4322
+ const all = [...concepts, ...queries];
4323
+ all.sort((a, b) => a.title.localeCompare(b.title));
4324
+ return all;
4325
+ }
4326
+
4327
+ // src/export/llms-txt.ts
4328
+ function pageRelativePath(page) {
4329
+ return `wiki/${page.pageDirectory}/${page.slug}.md`;
4330
+ }
4331
+ function buildEntryNote(page) {
4332
+ const parts = [];
4333
+ if (page.summary) parts.push(page.summary);
4334
+ if (page.tags.length > 0) parts.push(`tags: ${page.tags.join(", ")}`);
4335
+ if (page.sources.length > 0) parts.push(`sources: ${page.sources.join(", ")}`);
4336
+ parts.push(`created: ${page.createdAt}`);
4337
+ parts.push(`updated: ${page.updatedAt}`);
4338
+ return parts.join(" | ");
4339
+ }
4340
+ function formatPageEntry(page) {
4341
+ const note = buildEntryNote(page);
4342
+ return `- [${page.title}](${pageRelativePath(page)}): ${note}`;
4343
+ }
4344
+ function buildSection(heading, pages) {
4345
+ if (pages.length === 0) return [];
4346
+ return [`## ${heading}`, "", ...pages.map(formatPageEntry), ""];
4347
+ }
4348
+ function buildLlmsTxt(pages, projectTitle) {
4349
+ const concepts = pages.filter((p) => p.pageDirectory === "concepts");
4350
+ const queries = pages.filter((p) => p.pageDirectory === "queries");
4351
+ const lines = [
4352
+ `# ${projectTitle}`,
4353
+ "",
4354
+ `> ${pages.length} pages \u2014 exported ${(/* @__PURE__ */ new Date()).toISOString()}`,
4355
+ "",
4356
+ ...buildSection("Concepts", concepts),
4357
+ ...buildSection("Saved Queries", queries)
4358
+ ];
4359
+ return lines.join("\n");
4360
+ }
4361
+ function buildLlmsFullTxt(pages, projectTitle) {
4362
+ const sections = [buildLlmsTxt(pages, projectTitle)];
4363
+ for (const page of pages) {
4364
+ const tags = page.tags.length > 0 ? `
4365
+ Tags: ${page.tags.join(", ")}` : "";
4366
+ const sources = page.sources.length > 0 ? `
4367
+ Sources: ${page.sources.join(", ")}` : "";
4368
+ const header2 = [
4369
+ "---",
4370
+ `## ${page.title}`,
4371
+ `> ${page.summary}${tags}${sources}`,
4372
+ `Created: ${page.createdAt} | Updated: ${page.updatedAt}`,
4373
+ ""
4374
+ ].join("\n");
4375
+ sections.push(`${header2}
4376
+ ${page.body.trim()}
4377
+ `);
4378
+ }
4379
+ return sections.join("\n");
4380
+ }
4381
+
4382
+ // src/export/json-export.ts
4383
+ function buildJsonExport(pages) {
4384
+ const doc = {
4385
+ exportedAt: (/* @__PURE__ */ new Date()).toISOString(),
4386
+ pageCount: pages.length,
4387
+ pages
4388
+ };
4389
+ return JSON.stringify(doc, null, 2);
4390
+ }
4391
+
4392
+ // src/export/json-ld.ts
4393
+ var LOCAL_BASE = "urn:llmwiki:";
4394
+ function pageIri(slug) {
4395
+ return `${LOCAL_BASE}${slug}`;
4396
+ }
4397
+ function pageToJsonLd(page) {
4398
+ const node = {
4399
+ "@id": pageIri(page.slug),
4400
+ "@type": "Article",
4401
+ name: page.title,
4402
+ description: page.summary,
4403
+ dateCreated: page.createdAt,
4404
+ dateModified: page.updatedAt
4405
+ };
4406
+ if (page.tags.length > 0) {
4407
+ node["keywords"] = page.tags;
4408
+ }
4409
+ if (page.sources.length > 0) {
4410
+ node["isBasedOn"] = page.sources;
4411
+ }
4412
+ if (page.links.length > 0) {
4413
+ node["mentions"] = page.links.map((slug) => ({ "@id": pageIri(slug) }));
4414
+ }
4415
+ return node;
4416
+ }
4417
+ function buildJsonLd(pages) {
4418
+ const doc = {
4419
+ "@context": "https://schema.org",
4420
+ "@graph": pages.map(pageToJsonLd)
4421
+ };
4422
+ return JSON.stringify(doc, null, 2);
4423
+ }
4424
+
4425
+ // src/export/graphml.ts
4426
+ var XML_ESCAPES = {
4427
+ "&": "&amp;",
4428
+ "<": "&lt;",
4429
+ ">": "&gt;",
4430
+ '"': "&quot;",
4431
+ "'": "&apos;"
4432
+ };
4433
+ function escapeXml(value) {
4434
+ return value.replace(/[&<>"']/g, (ch) => XML_ESCAPES[ch] ?? ch);
4435
+ }
4436
+ var KEY_DEFS = [
4437
+ '<key id="title" for="node" attr.name="title" attr.type="string"/>',
4438
+ '<key id="summary" for="node" attr.name="summary" attr.type="string"/>',
4439
+ '<key id="tags" for="node" attr.name="tags" attr.type="string"/>',
4440
+ '<key id="sources" for="node" attr.name="sources" attr.type="string"/>',
4441
+ '<key id="createdAt" for="node" attr.name="createdAt" attr.type="string"/>',
4442
+ '<key id="updatedAt" for="node" attr.name="updatedAt" attr.type="string"/>'
4443
+ ].join("\n ");
4444
+ function pageToNode(page) {
4445
+ const tags = page.tags.join(", ");
4446
+ const sources = page.sources.join(", ");
4447
+ return [
4448
+ ` <node id="${escapeXml(page.slug)}">`,
4449
+ ` <data key="title">${escapeXml(page.title)}</data>`,
4450
+ ` <data key="summary">${escapeXml(page.summary)}</data>`,
4451
+ ` <data key="tags">${escapeXml(tags)}</data>`,
4452
+ ` <data key="sources">${escapeXml(sources)}</data>`,
4453
+ ` <data key="createdAt">${escapeXml(page.createdAt)}</data>`,
4454
+ ` <data key="updatedAt">${escapeXml(page.updatedAt)}</data>`,
4455
+ ` </node>`
4456
+ ].join("\n");
4457
+ }
4458
+ function pageToEdges(page, knownSlugs) {
4459
+ return page.links.filter((slug) => knownSlugs.has(slug)).map(
4460
+ (slug) => ` <edge source="${escapeXml(page.slug)}" target="${escapeXml(slug)}"/>`
4461
+ );
4462
+ }
4463
+ function buildGraphml(pages) {
4464
+ const knownSlugs = new Set(pages.map((p) => p.slug));
4465
+ const nodes = pages.map(pageToNode).join("\n");
4466
+ const edges = pages.flatMap((p) => pageToEdges(p, knownSlugs)).join("\n");
4467
+ return [
4468
+ '<?xml version="1.0" encoding="UTF-8"?>',
4469
+ '<graphml xmlns="http://graphml.graphdrawing.org/graphml"',
4470
+ ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"',
4471
+ ' xsi:schemaLocation="http://graphml.graphdrawing.org/graphml',
4472
+ ' http://graphml.graphdrawing.org/graphml/1.0/graphml.xsd">',
4473
+ ` ${KEY_DEFS}`,
4474
+ ' <graph id="wiki" edgedefault="directed">',
4475
+ nodes,
4476
+ edges,
4477
+ " </graph>",
4478
+ "</graphml>",
4479
+ ""
4480
+ ].join("\n");
4481
+ }
4482
+
4483
+ // src/export/marp.ts
4484
+ var SLIDE_BODY_MAX_CHARS = 300;
4485
+ function extractFirstParagraph(body) {
4486
+ const trimmed = body.trim();
4487
+ const firstBlock = trimmed.split(/\n\s*\n/)[0] ?? "";
4488
+ const stripped = firstBlock.replace(/^#{1,6}\s+/gm, "").replace(/^[-*+]\s+/gm, "").trim();
4489
+ if (stripped.length <= SLIDE_BODY_MAX_CHARS) return stripped;
4490
+ return `${stripped.slice(0, SLIDE_BODY_MAX_CHARS)}\u2026`;
4491
+ }
4492
+ function buildSpeakerNotes(page) {
4493
+ const parts = [`created: ${page.createdAt}`, `updated: ${page.updatedAt}`];
4494
+ if (page.sources.length > 0) parts.push(`sources: ${page.sources.join(", ")}`);
4495
+ return `<!-- ${parts.join(" | ")} -->`;
4496
+ }
4497
+ function pageToSlide(page) {
4498
+ const tagLine = page.tags.length > 0 ? `
4499
+ _Tags: ${page.tags.join(", ")}_` : "";
4500
+ const excerpt = extractFirstParagraph(page.body);
4501
+ const notes = buildSpeakerNotes(page);
4502
+ return [
4503
+ `## ${page.title}`,
4504
+ "",
4505
+ `> ${page.summary}${tagLine}`,
4506
+ "",
4507
+ excerpt,
4508
+ "",
4509
+ notes
4510
+ ].join("\n");
4511
+ }
4512
+ function filterBySource(pages, source2) {
4513
+ if (source2 === "all") return pages;
4514
+ return pages.filter((p) => p.pageDirectory === source2);
4515
+ }
4516
+ function buildMarp(pages, projectTitle, source2 = "all") {
4517
+ const filtered = filterBySource(pages, source2);
4518
+ const frontmatter = [
4519
+ "---",
4520
+ "marp: true",
4521
+ "theme: default",
4522
+ "paginate: true",
4523
+ `title: "${projectTitle}"`,
4524
+ "---"
4525
+ ].join("\n");
4526
+ const titleSlide = [
4527
+ "",
4528
+ `# ${projectTitle}`,
4529
+ "",
4530
+ `${filtered.length} pages | ${(/* @__PURE__ */ new Date()).toISOString()}`
4531
+ ].join("\n");
4532
+ const slides = filtered.map((p) => `---
4533
+
4534
+ ${pageToSlide(p)}`);
4535
+ return [frontmatter, titleSlide, ...slides, ""].join("\n\n");
4536
+ }
4537
+
4538
+ // src/export/types.ts
4539
+ var MARP_SOURCES = ["concepts", "queries", "all"];
4540
+ var EXPORT_TARGETS = [
4541
+ "llms-txt",
4542
+ "llms-full-txt",
4543
+ "json",
4544
+ "json-ld",
4545
+ "graphml",
4546
+ "marp"
4547
+ ];
4548
+
4549
+ // src/commands/export.ts
4550
+ var require2 = createRequire(import.meta.url);
4551
+ var EXPORT_DIR = "dist/exports";
4552
+ var TARGET_FILENAMES = {
4553
+ "llms-txt": "llms.txt",
4554
+ "llms-full-txt": "llms-full.txt",
4555
+ json: "wiki.json",
4556
+ "json-ld": "wiki.jsonld",
4557
+ graphml: "wiki.graphml",
4558
+ marp: "wiki.md"
4559
+ };
4560
+ function resolveProjectTitle(root) {
4561
+ try {
4562
+ const pkg = require2(path30.join(root, "package.json"));
4563
+ return typeof pkg.name === "string" ? pkg.name : "Knowledge Wiki";
4564
+ } catch {
4565
+ return "Knowledge Wiki";
4566
+ }
4567
+ }
4568
+ function isValidTarget(value) {
4569
+ return EXPORT_TARGETS.includes(value);
4570
+ }
4571
+ function isValidMarpSource(value) {
4572
+ return MARP_SOURCES.includes(value);
4573
+ }
4574
+ function resolveMarpSource(rawSource) {
4575
+ if (!rawSource) return "all";
4576
+ if (!isValidMarpSource(rawSource)) {
4577
+ throw new Error(
4578
+ `Unknown --source value "${rawSource}". Valid values: ${MARP_SOURCES.join(", ")}`
4579
+ );
4580
+ }
4581
+ return rawSource;
4582
+ }
4583
+ function buildContent(target, pages, projectTitle, marpSource) {
4584
+ switch (target) {
4585
+ case "llms-txt":
4586
+ return buildLlmsTxt(pages, projectTitle);
4587
+ case "llms-full-txt":
4588
+ return buildLlmsFullTxt(pages, projectTitle);
4589
+ case "json":
4590
+ return buildJsonExport(pages);
4591
+ case "json-ld":
4592
+ return buildJsonLd(pages);
4593
+ case "graphml":
4594
+ return buildGraphml(pages);
4595
+ case "marp":
4596
+ return buildMarp(pages, projectTitle, marpSource);
4597
+ }
4598
+ }
4599
+ function computeReportedPageCount(pages, targets, marpSource) {
4600
+ const onlyMarpTarget = targets.length === 1 && targets[0] === "marp";
4601
+ if (onlyMarpTarget && marpSource !== "all") {
4602
+ return pages.filter((p) => p.pageDirectory === marpSource).length;
4603
+ }
4604
+ return pages.length;
4605
+ }
4606
+ async function runExport(root, options = {}) {
4607
+ const pages = await collectExportPages(root);
4608
+ const projectTitle = resolveProjectTitle(root);
4609
+ const targets = resolveTargets(options.target);
4610
+ const marpSource = resolveMarpSource(options.source);
4611
+ const written = [];
4612
+ for (const target of targets) {
4613
+ const content = buildContent(target, pages, projectTitle, marpSource);
4614
+ const outPath = path30.join(root, EXPORT_DIR, TARGET_FILENAMES[target]);
4615
+ await atomicWrite(outPath, content);
4616
+ written.push(outPath);
4617
+ status("+", success(`Exported ${target} \u2192 ${source(outPath)}`));
4618
+ }
4619
+ return { written, pageCount: computeReportedPageCount(pages, targets, marpSource) };
4620
+ }
4621
+ function resolveTargets(rawTarget) {
4622
+ if (!rawTarget) return [...EXPORT_TARGETS];
4623
+ if (!isValidTarget(rawTarget)) {
4624
+ throw new Error(
4625
+ `Unknown export target "${rawTarget}". Valid targets: ${EXPORT_TARGETS.join(", ")}`
4626
+ );
4627
+ }
4628
+ return [rawTarget];
4629
+ }
4630
+ async function exportCommand(root, options) {
4631
+ header("Exporting wiki");
4632
+ const { written, pageCount } = await runExport(root, options);
4633
+ status(
4634
+ "\u2713",
4635
+ success(`Done \u2014 ${pageCount} pages exported to ${written.length} file(s).`)
4636
+ );
4637
+ }
4638
+
3774
4639
  // src/commands/schema.ts
3775
4640
  import { existsSync as existsSync10 } from "fs";
3776
4641
  import { mkdir as mkdir6, writeFile as writeFile5 } from "fs/promises";
3777
- import path24 from "path";
4642
+ import path31 from "path";
3778
4643
  async function schemaInitCommand() {
3779
4644
  const root = process.cwd();
3780
4645
  const defaults = buildDefaultSchema();
@@ -3783,7 +4648,7 @@ async function schemaInitCommand() {
3783
4648
  status("!", warn(`Schema file already exists at ${targetPath}`));
3784
4649
  return;
3785
4650
  }
3786
- await mkdir6(path24.dirname(targetPath), { recursive: true });
4651
+ await mkdir6(path31.dirname(targetPath), { recursive: true });
3787
4652
  const serializable = {
3788
4653
  version: defaults.version,
3789
4654
  defaultKind: defaults.defaultKind,
@@ -3839,10 +4704,17 @@ async function reviewShowCommand(id) {
3839
4704
  status("!", warn(`[${v.severity}] ${v.message}`));
3840
4705
  }
3841
4706
  }
4707
+ if (candidate.provenanceViolations && candidate.provenanceViolations.length > 0) {
4708
+ console.log();
4709
+ header("Provenance violations");
4710
+ for (const v of candidate.provenanceViolations) {
4711
+ status("!", warn(`[${v.severity}] ${v.message}`));
4712
+ }
4713
+ }
3842
4714
  }
3843
4715
 
3844
4716
  // src/commands/review-approve.ts
3845
- import path25 from "path";
4717
+ import path32 from "path";
3846
4718
 
3847
4719
  // src/commands/review-helpers.ts
3848
4720
  async function runReviewUnderLock(id, underLock) {
@@ -3874,7 +4746,7 @@ async function approveUnderLock(root, id) {
3874
4746
  process.exitCode = 1;
3875
4747
  return;
3876
4748
  }
3877
- const pagePath = path25.join(root, CONCEPTS_DIR, `${candidate.slug}.md`);
4749
+ const pagePath = path32.join(root, CONCEPTS_DIR, `${candidate.slug}.md`);
3878
4750
  await atomicWrite(pagePath, candidate.body);
3879
4751
  status("+", success(`Approved \u2192 ${source(pagePath)}`));
3880
4752
  await persistCandidateSourceStates(root, candidate);
@@ -3934,7 +4806,7 @@ import { McpServer as McpServer2 } from "@modelcontextprotocol/sdk/server/mcp.js
3934
4806
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
3935
4807
 
3936
4808
  // src/mcp/tools.ts
3937
- import path26 from "path";
4809
+ import path33 from "path";
3938
4810
  import { z } from "zod";
3939
4811
 
3940
4812
  // src/mcp/provider-check.ts
@@ -4070,7 +4942,7 @@ async function pickSearchSlugs(root, question) {
4070
4942
  if (candidates.length > 0) return candidates.map((c) => c.slug);
4071
4943
  } catch {
4072
4944
  }
4073
- const indexContent = await safeReadFile(path26.join(root, INDEX_FILE));
4945
+ const indexContent = await safeReadFile(path33.join(root, INDEX_FILE));
4074
4946
  const { pages } = await selectPages(question, indexContent);
4075
4947
  return pages;
4076
4948
  }
@@ -4129,8 +5001,8 @@ function registerStatusTool(server, root) {
4129
5001
  );
4130
5002
  }
4131
5003
  async function collectStatus(root) {
4132
- const concepts = await collectPageSummaries(path26.join(root, CONCEPTS_DIR));
4133
- const queries = await collectPageSummaries(path26.join(root, QUERIES_DIR));
5004
+ const concepts = await collectPageSummaries(path33.join(root, CONCEPTS_DIR));
5005
+ const queries = await collectPageSummaries(path33.join(root, QUERIES_DIR));
4134
5006
  const state = await readState(root);
4135
5007
  const changes = await detectChanges(root, state);
4136
5008
  const orphans = await findOrphanedSlugs(root);
@@ -4147,7 +5019,7 @@ async function collectStatus(root) {
4147
5019
  };
4148
5020
  }
4149
5021
  async function findOrphanedSlugs(root) {
4150
- const scanned = await scanWikiPages(path26.join(root, CONCEPTS_DIR));
5022
+ const scanned = await scanWikiPages(path33.join(root, CONCEPTS_DIR));
4151
5023
  return scanned.filter(({ meta }) => meta.orphaned).map(({ slug }) => slug);
4152
5024
  }
4153
5025
  async function loadPageRecords(root, slugs) {
@@ -4160,7 +5032,7 @@ async function loadPageRecords(root, slugs) {
4160
5032
  }
4161
5033
  async function readPage(root, slug) {
4162
5034
  for (const dir of PAGE_DIRS2) {
4163
- const content = await safeReadFile(path26.join(root, dir, `${slug}.md`));
5035
+ const content = await safeReadFile(path33.join(root, dir, `${slug}.md`));
4164
5036
  if (!content) continue;
4165
5037
  const { meta, body } = parseFrontmatter(content);
4166
5038
  if (meta.orphaned) continue;
@@ -4175,8 +5047,8 @@ async function readPage(root, slug) {
4175
5047
  }
4176
5048
 
4177
5049
  // src/mcp/resources.ts
4178
- import path27 from "path";
4179
- import { readdir as readdir9 } from "fs/promises";
5050
+ import path34 from "path";
5051
+ import { readdir as readdir11 } from "fs/promises";
4180
5052
  import { ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
4181
5053
  function jsonContent(uri, payload) {
4182
5054
  return {
@@ -4209,7 +5081,7 @@ function registerIndexResource(server, root) {
4209
5081
  mimeType: "text/markdown"
4210
5082
  },
4211
5083
  async (uri) => {
4212
- const content = await safeReadFile(path27.join(root, INDEX_FILE));
5084
+ const content = await safeReadFile(path34.join(root, INDEX_FILE));
4213
5085
  return { contents: [markdownContent(uri, content)] };
4214
5086
  }
4215
5087
  );
@@ -4276,23 +5148,23 @@ function registerQueryResource(server, root) {
4276
5148
  );
4277
5149
  }
4278
5150
  async function listSources(root) {
4279
- const sourcesPath = path27.join(root, SOURCES_DIR);
5151
+ const sourcesPath = path34.join(root, SOURCES_DIR);
4280
5152
  let files;
4281
5153
  try {
4282
- files = await readdir9(sourcesPath);
5154
+ files = await readdir11(sourcesPath);
4283
5155
  } catch {
4284
5156
  return [];
4285
5157
  }
4286
5158
  const records = [];
4287
5159
  for (const file of files.filter((f) => f.endsWith(".md"))) {
4288
- const content = await safeReadFile(path27.join(sourcesPath, file));
5160
+ const content = await safeReadFile(path34.join(sourcesPath, file));
4289
5161
  const { meta } = parseFrontmatter(content);
4290
5162
  records.push({ filename: file, ...meta });
4291
5163
  }
4292
5164
  return records;
4293
5165
  }
4294
5166
  async function loadPageWithMeta(root, dir, slug) {
4295
- const filePath = path27.join(root, dir, `${slug}.md`);
5167
+ const filePath = path34.join(root, dir, `${slug}.md`);
4296
5168
  const content = await safeReadFile(filePath);
4297
5169
  if (!content) {
4298
5170
  throw new Error(`Page not found: ${dir}/${slug}.md`);
@@ -4301,10 +5173,10 @@ async function loadPageWithMeta(root, dir, slug) {
4301
5173
  return { slug, meta, body: body.trim() };
4302
5174
  }
4303
5175
  async function listPagesUnder(root, dir, scheme) {
4304
- const pagesPath = path27.join(root, dir);
5176
+ const pagesPath = path34.join(root, dir);
4305
5177
  let files;
4306
5178
  try {
4307
- files = await readdir9(pagesPath);
5179
+ files = await readdir11(pagesPath);
4308
5180
  } catch {
4309
5181
  return { resources: [] };
4310
5182
  }
@@ -4328,8 +5200,8 @@ async function startMCPServer(options) {
4328
5200
  }
4329
5201
 
4330
5202
  // src/cli.ts
4331
- var require2 = createRequire(import.meta.url);
4332
- var { version } = require2("../package.json");
5203
+ var require3 = createRequire2(import.meta.url);
5204
+ var { version } = require3("../package.json");
4333
5205
  var program = new Command();
4334
5206
  program.name("llmwiki").description("The knowledge compiler \u2014 raw sources in, interlinked wiki out").version(version);
4335
5207
  program.command("ingest <source>").description("Ingest a URL or local file into sources/").action(async (source2) => {
@@ -4340,11 +5212,23 @@ program.command("ingest <source>").description("Ingest a URL or local file into
4340
5212
  process.exit(1);
4341
5213
  }
4342
5214
  });
5215
+ program.command("ingest-session <path>").description("Ingest a coding-agent session export (Claude, Codex, Cursor) into sources/").action(async (targetPath) => {
5216
+ try {
5217
+ await ingestSession(targetPath);
5218
+ } catch (err) {
5219
+ console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
5220
+ process.exit(1);
5221
+ }
5222
+ });
4343
5223
  program.command("compile").description("Compile sources/ into an interlinked wiki").option(
4344
5224
  "--review",
4345
5225
  "Write generated pages as review candidates under .llmwiki/candidates/ instead of mutating wiki/. Orphan-marking for deleted sources is deferred until the next non-review compile."
5226
+ ).option(
5227
+ "--lang <code>",
5228
+ 'Target language for generated wiki content (e.g. "Chinese", "ja", "zh-CN"). Equivalent to setting LLMWIKI_OUTPUT_LANG.'
4346
5229
  ).action(async (options) => {
4347
5230
  try {
5231
+ applyLanguageOption(options.lang);
4348
5232
  requireProvider();
4349
5233
  await compileCommand({ review: options.review });
4350
5234
  } catch (err) {
@@ -4385,15 +5269,21 @@ reviewCommand.command("reject <id>").description("Reject a candidate and archive
4385
5269
  process.exit(1);
4386
5270
  }
4387
5271
  });
4388
- program.command("query <question>").description("Ask a question against the wiki").option("--save", "Save the answer as a wiki page").option("--debug", "Print which pages and chunks were selected and their scores").action(async (question, options) => {
4389
- try {
4390
- requireProvider();
4391
- await queryCommand(process.cwd(), question, options);
4392
- } catch (err) {
4393
- console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
4394
- process.exit(1);
5272
+ program.command("query <question>").description("Ask a question against the wiki").option("--save", "Save the answer as a wiki page").option("--debug", "Print which pages and chunks were selected and their scores").option(
5273
+ "--lang <code>",
5274
+ 'Target language for the answer (e.g. "Chinese", "ja", "zh-CN"). Equivalent to setting LLMWIKI_OUTPUT_LANG.'
5275
+ ).action(
5276
+ async (question, options) => {
5277
+ try {
5278
+ applyLanguageOption(options.lang);
5279
+ requireProvider();
5280
+ await queryCommand(process.cwd(), question, options);
5281
+ } catch (err) {
5282
+ console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
5283
+ process.exit(1);
5284
+ }
4395
5285
  }
4396
- });
5286
+ );
4397
5287
  program.command("watch").description("Watch sources/ and auto-recompile on changes").action(async () => {
4398
5288
  try {
4399
5289
  requireProvider();
@@ -4428,6 +5318,17 @@ schemaCmd.command("show").description("Print the resolved schema for this projec
4428
5318
  process.exit(1);
4429
5319
  }
4430
5320
  });
5321
+ program.command("export").description("Export wiki content to portable formats (llms.txt, JSON, GraphML, Marp, \u2026)").option("--target <name>", "Limit export to a single target format").option(
5322
+ "--source <kind>",
5323
+ "For marp target: which pages to include \u2014 concepts, queries, or all (default: all)"
5324
+ ).action(async (options) => {
5325
+ try {
5326
+ await exportCommand(process.cwd(), options);
5327
+ } catch (err) {
5328
+ console.error(`\x1B[31mError:\x1B[0m ${err instanceof Error ? err.message : err}`);
5329
+ process.exit(1);
5330
+ }
5331
+ });
4431
5332
  program.command("serve").description("Start an MCP server exposing wiki tools and resources over stdio").option("--root <dir>", "Project root directory", process.cwd()).action(async (options) => {
4432
5333
  try {
4433
5334
  await startMCPServer({ root: options.root, version });
@@ -4436,6 +5337,11 @@ program.command("serve").description("Start an MCP server exposing wiki tools an
4436
5337
  process.exit(1);
4437
5338
  }
4438
5339
  });
5340
+ function applyLanguageOption(lang) {
5341
+ if (lang && lang.trim().length > 0) {
5342
+ process.env.LLMWIKI_OUTPUT_LANG = lang.trim();
5343
+ }
5344
+ }
4439
5345
  var PROVIDER_KEY_VARS2 = {
4440
5346
  anthropic: "ANTHROPIC_API_KEY",
4441
5347
  openai: "OPENAI_API_KEY",