@swarmvaultai/engine 0.1.21 → 0.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -21,7 +21,7 @@ import {
21
21
  uniqueBy,
22
22
  writeFileIfChanged,
23
23
  writeJsonFile
24
- } from "./chunk-QMW7OISM.js";
24
+ } from "./chunk-6UPHDGEB.js";
25
25
 
26
26
  // src/agents.ts
27
27
  import fs from "fs/promises";
@@ -593,9 +593,10 @@ async function uninstallGitHooks(rootDir) {
593
593
  }
594
594
 
595
595
  // src/ingest.ts
596
- import fs8 from "fs/promises";
597
- import path8 from "path";
596
+ import fs9 from "fs/promises";
597
+ import path9 from "path";
598
598
  import { Readability } from "@mozilla/readability";
599
+ import matter3 from "gray-matter";
599
600
  import ignore from "ignore";
600
601
  import { JSDOM } from "jsdom";
601
602
  import mime from "mime-types";
@@ -2728,9 +2729,10 @@ async function analyzeCodeSource(manifest, extractedText, schemaHash) {
2728
2729
  const language = manifest.language ?? inferCodeLanguage(manifest.originalPath ?? manifest.storedPath, manifest.mimeType) ?? "typescript";
2729
2730
  const { code, rationales } = language === "javascript" || language === "jsx" || language === "typescript" || language === "tsx" ? analyzeTypeScriptLikeCode(manifest, extractedText) : await analyzeTreeSitterCode(manifest, extractedText, language);
2730
2731
  return {
2731
- analysisVersion: 4,
2732
+ analysisVersion: 5,
2732
2733
  sourceId: manifest.sourceId,
2733
2734
  sourceHash: manifest.contentHash,
2735
+ extractionHash: manifest.extractionHash,
2734
2736
  schemaHash,
2735
2737
  title: manifest.title,
2736
2738
  summary: summarizeModule(manifest, code),
@@ -2744,19 +2746,247 @@ async function analyzeCodeSource(manifest, extractedText, schemaHash) {
2744
2746
  };
2745
2747
  }
2746
2748
 
2747
- // src/logs.ts
2749
+ // src/extraction.ts
2748
2750
  import fs6 from "fs/promises";
2751
+ import os from "os";
2749
2752
  import path6 from "path";
2753
+ import { z } from "zod";
2754
+ var imageVisionExtractionSchema = z.object({
2755
+ title: z.string().min(1).nullable().optional(),
2756
+ summary: z.string().min(1),
2757
+ text: z.string().default(""),
2758
+ concepts: z.array(
2759
+ z.object({
2760
+ name: z.string().min(1),
2761
+ description: z.string().default("")
2762
+ })
2763
+ ).max(12).default([]),
2764
+ entities: z.array(
2765
+ z.object({
2766
+ name: z.string().min(1),
2767
+ description: z.string().default("")
2768
+ })
2769
+ ).max(12).default([]),
2770
+ claims: z.array(
2771
+ z.object({
2772
+ text: z.string().min(1),
2773
+ confidence: z.number().min(0).max(1).default(0.65),
2774
+ polarity: z.enum(["positive", "negative", "neutral"]).default("neutral")
2775
+ })
2776
+ ).max(8).default([]),
2777
+ questions: z.array(z.string().min(1)).max(6).default([])
2778
+ });
2779
+ function extractionMetadata(sourceKind, mimeType, extractor) {
2780
+ return {
2781
+ extractor,
2782
+ sourceKind,
2783
+ mimeType,
2784
+ producedAt: (/* @__PURE__ */ new Date()).toISOString()
2785
+ };
2786
+ }
2787
+ function buildExtractionHash(extractedText, artifact) {
2788
+ if (!extractedText && !artifact) {
2789
+ return void 0;
2790
+ }
2791
+ const normalizedArtifact = artifact ? {
2792
+ ...artifact,
2793
+ producedAt: void 0
2794
+ } : null;
2795
+ return sha256(
2796
+ JSON.stringify({
2797
+ extractedText: extractedText ?? null,
2798
+ artifact: normalizedArtifact
2799
+ })
2800
+ );
2801
+ }
2802
+ function createPlainTextExtractionArtifact(sourceKind, mimeType) {
2803
+ return extractionMetadata(sourceKind, mimeType, "plain_text");
2804
+ }
2805
+ function createHtmlReadabilityExtractionArtifact(sourceKind, mimeType) {
2806
+ return extractionMetadata(sourceKind, mimeType, "html_readability");
2807
+ }
2808
+ function normalizeVisionMarkdown(payload) {
2809
+ const sections = [];
2810
+ if (payload.summary.trim()) {
2811
+ sections.push(payload.summary.trim());
2812
+ }
2813
+ if (payload.text.trim()) {
2814
+ sections.push(payload.text.trim());
2815
+ }
2816
+ if (payload.claims.length) {
2817
+ sections.push(payload.claims.map((claim) => `- ${claim.text}`).join("\n"));
2818
+ }
2819
+ return sections.join("\n\n").trim();
2820
+ }
2821
+ async function materializeAttachmentPath(input) {
2822
+ if (input.filePath) {
2823
+ return {
2824
+ filePath: input.filePath,
2825
+ cleanup: async () => {
2826
+ }
2827
+ };
2828
+ }
2829
+ if (!input.bytes) {
2830
+ throw new Error("Image extraction requires a file path or bytes.");
2831
+ }
2832
+ const tempDir = await fs6.mkdtemp(path6.join(os.tmpdir(), "swarmvault-image-extract-"));
2833
+ const extension = input.mimeType.split("/")[1]?.split("+")[0] ?? "bin";
2834
+ const tempPath = path6.join(tempDir, `source.${extension}`);
2835
+ await fs6.writeFile(tempPath, input.bytes);
2836
+ return {
2837
+ filePath: tempPath,
2838
+ cleanup: async () => {
2839
+ await fs6.rm(tempDir, { recursive: true, force: true });
2840
+ }
2841
+ };
2842
+ }
2843
+ async function extractImageWithVision(rootDir, input) {
2844
+ let provider;
2845
+ try {
2846
+ provider = await getProviderForTask(rootDir, "visionProvider");
2847
+ } catch (error) {
2848
+ return {
2849
+ artifact: {
2850
+ ...extractionMetadata("image", input.mimeType, "image_vision"),
2851
+ warnings: [`Vision extraction unavailable: ${error instanceof Error ? error.message : "provider not configured"}`]
2852
+ }
2853
+ };
2854
+ }
2855
+ if (provider.type === "heuristic" || !provider.capabilities.has("vision") || !provider.capabilities.has("structured")) {
2856
+ return {
2857
+ artifact: {
2858
+ ...extractionMetadata("image", input.mimeType, "image_vision"),
2859
+ warnings: [`Vision extraction unavailable for provider ${provider.id}. Configure a structured multimodal provider.`]
2860
+ }
2861
+ };
2862
+ }
2863
+ const attachment = await materializeAttachmentPath(input);
2864
+ try {
2865
+ const parsed = await provider.generateStructured(
2866
+ {
2867
+ system: [
2868
+ "You extract grounded notes from a single image for a local-first knowledge vault.",
2869
+ "Only describe content that is actually visible.",
2870
+ "If the image contains text, transcribe it accurately.",
2871
+ "If the image is a diagram or screenshot, summarize the key visible relationships and labels without speculation."
2872
+ ].join("\n"),
2873
+ prompt: [
2874
+ `Source title: ${input.title}`,
2875
+ "Return structured extraction for this image.",
2876
+ "Include a concise summary, OCR-style text, grounded concepts/entities, visible claims, and follow-up questions."
2877
+ ].join("\n"),
2878
+ attachments: [{ mimeType: input.mimeType, filePath: attachment.filePath }]
2879
+ },
2880
+ imageVisionExtractionSchema
2881
+ );
2882
+ const artifact = {
2883
+ ...extractionMetadata("image", input.mimeType, "image_vision"),
2884
+ providerId: provider.id,
2885
+ providerModel: provider.model,
2886
+ vision: {
2887
+ title: parsed.title ?? void 0,
2888
+ summary: parsed.summary,
2889
+ text: parsed.text,
2890
+ concepts: parsed.concepts,
2891
+ entities: parsed.entities,
2892
+ claims: parsed.claims,
2893
+ questions: parsed.questions
2894
+ }
2895
+ };
2896
+ return {
2897
+ title: parsed.title ?? void 0,
2898
+ extractedText: normalizeVisionMarkdown(parsed),
2899
+ artifact
2900
+ };
2901
+ } catch (error) {
2902
+ return {
2903
+ artifact: {
2904
+ ...extractionMetadata("image", input.mimeType, "image_vision"),
2905
+ providerId: provider.id,
2906
+ providerModel: provider.model,
2907
+ warnings: [`Vision extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
2908
+ }
2909
+ };
2910
+ } finally {
2911
+ await attachment.cleanup();
2912
+ }
2913
+ }
2914
+ function normalizePdfMetadata(raw) {
2915
+ if (!raw || typeof raw !== "object") {
2916
+ return void 0;
2917
+ }
2918
+ const metadata = {};
2919
+ for (const [key, value] of Object.entries(raw)) {
2920
+ if (typeof value === "string") {
2921
+ const cleaned = normalizeWhitespace(value);
2922
+ if (cleaned) {
2923
+ metadata[key] = cleaned;
2924
+ }
2925
+ }
2926
+ }
2927
+ return Object.keys(metadata).length ? metadata : void 0;
2928
+ }
2929
+ async function extractPdfText(input) {
2930
+ try {
2931
+ const pdfjs = await import("pdfjs-dist/legacy/build/pdf.mjs");
2932
+ const task = pdfjs.getDocument({
2933
+ data: new Uint8Array(input.bytes),
2934
+ useWorkerFetch: false,
2935
+ isEvalSupported: false,
2936
+ disableFontFace: true,
2937
+ verbosity: 0
2938
+ });
2939
+ const document = await task.promise;
2940
+ const pageTexts = [];
2941
+ for (let pageNumber = 1; pageNumber <= document.numPages; pageNumber += 1) {
2942
+ const page = await document.getPage(pageNumber);
2943
+ const textContent = await page.getTextContent();
2944
+ const pageText = normalizeWhitespace(
2945
+ textContent.items.map((item) => typeof item === "object" && item && "str" in item && typeof item.str === "string" ? item.str : "").join(" ")
2946
+ );
2947
+ if (pageText) {
2948
+ pageTexts.push(pageText);
2949
+ }
2950
+ page.cleanup();
2951
+ }
2952
+ const metadataResult = await document.getMetadata().catch(() => null);
2953
+ await task.destroy();
2954
+ const extractedText = pageTexts.join("\n\n").trim();
2955
+ const artifact = {
2956
+ ...extractionMetadata("pdf", input.mimeType, "pdf_text"),
2957
+ pageCount: document.numPages,
2958
+ metadata: normalizePdfMetadata(metadataResult?.info)
2959
+ };
2960
+ if (!extractedText) {
2961
+ artifact.warnings = ["PDF text extraction completed but produced no extractable text."];
2962
+ }
2963
+ return {
2964
+ extractedText: extractedText || void 0,
2965
+ artifact
2966
+ };
2967
+ } catch (error) {
2968
+ return {
2969
+ artifact: {
2970
+ ...extractionMetadata("pdf", input.mimeType, "pdf_text"),
2971
+ warnings: [`PDF text extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
2972
+ }
2973
+ };
2974
+ }
2975
+ }
2976
+
2977
+ // src/logs.ts
2978
+ import fs7 from "fs/promises";
2979
+ import path7 from "path";
2750
2980
  import matter from "gray-matter";
2751
2981
  async function resolveUniqueSessionPath(rootDir, operation, title, startedAt) {
2752
2982
  const { paths } = await initWorkspace(rootDir);
2753
2983
  await ensureDir(paths.sessionsDir);
2754
2984
  const timestamp = startedAt.replace(/[:.]/g, "-");
2755
2985
  const baseName = `${timestamp}-${operation}-${slugify(title)}`;
2756
- let candidate = path6.join(paths.sessionsDir, `${baseName}.md`);
2986
+ let candidate = path7.join(paths.sessionsDir, `${baseName}.md`);
2757
2987
  let counter = 2;
2758
2988
  while (await fileExists(candidate)) {
2759
- candidate = path6.join(paths.sessionsDir, `${baseName}-${counter}.md`);
2989
+ candidate = path7.join(paths.sessionsDir, `${baseName}-${counter}.md`);
2760
2990
  counter++;
2761
2991
  }
2762
2992
  return candidate;
@@ -2764,11 +2994,11 @@ async function resolveUniqueSessionPath(rootDir, operation, title, startedAt) {
2764
2994
  async function appendLogEntry(rootDir, action, title, lines = []) {
2765
2995
  const { paths } = await initWorkspace(rootDir);
2766
2996
  await ensureDir(paths.wikiDir);
2767
- const logPath = path6.join(paths.wikiDir, "log.md");
2997
+ const logPath = path7.join(paths.wikiDir, "log.md");
2768
2998
  const timestamp = (/* @__PURE__ */ new Date()).toISOString().slice(0, 19).replace("T", " ");
2769
2999
  const entry = [`## [${timestamp}] ${action} | ${title}`, ...lines.map((line) => `- ${line}`), ""].join("\n");
2770
- const existing = await fileExists(logPath) ? await fs6.readFile(logPath, "utf8") : "# Log\n\n";
2771
- await fs6.writeFile(logPath, `${existing}${entry}
3000
+ const existing = await fileExists(logPath) ? await fs7.readFile(logPath, "utf8") : "# Log\n\n";
3001
+ await fs7.writeFile(logPath, `${existing}${entry}
2772
3002
  `, "utf8");
2773
3003
  }
2774
3004
  async function recordSession(rootDir, input) {
@@ -2778,8 +3008,8 @@ async function recordSession(rootDir, input) {
2778
3008
  const finishedAtIso = new Date(input.finishedAt ?? input.startedAt).toISOString();
2779
3009
  const durationMs = Math.max(0, new Date(finishedAtIso).getTime() - new Date(startedAtIso).getTime());
2780
3010
  const sessionPath = await resolveUniqueSessionPath(rootDir, input.operation, input.title, startedAtIso);
2781
- const sessionId = path6.basename(sessionPath, ".md");
2782
- const relativeSessionPath = path6.relative(rootDir, sessionPath).split(path6.sep).join(path6.posix.sep);
3011
+ const sessionId = path7.basename(sessionPath, ".md");
3012
+ const relativeSessionPath = path7.relative(rootDir, sessionPath).split(path7.sep).join(path7.posix.sep);
2783
3013
  const frontmatter = Object.fromEntries(
2784
3014
  Object.entries({
2785
3015
  session_id: sessionId,
@@ -2827,7 +3057,7 @@ async function recordSession(rootDir, input) {
2827
3057
  frontmatter
2828
3058
  );
2829
3059
  await writeFileIfChanged(sessionPath, content);
2830
- const logPath = path6.join(paths.wikiDir, "log.md");
3060
+ const logPath = path7.join(paths.wikiDir, "log.md");
2831
3061
  const timestamp = startedAtIso.slice(0, 19).replace("T", " ");
2832
3062
  const entry = [
2833
3063
  `## [${timestamp}] ${input.operation} | ${input.title}`,
@@ -2835,8 +3065,8 @@ async function recordSession(rootDir, input) {
2835
3065
  ...(input.lines ?? []).map((line) => `- ${line}`),
2836
3066
  ""
2837
3067
  ].join("\n");
2838
- const existing = await fileExists(logPath) ? await fs6.readFile(logPath, "utf8") : "# Log\n\n";
2839
- await fs6.writeFile(logPath, `${existing}${entry}
3068
+ const existing = await fileExists(logPath) ? await fs7.readFile(logPath, "utf8") : "# Log\n\n";
3069
+ await fs7.writeFile(logPath, `${existing}${entry}
2840
3070
  `, "utf8");
2841
3071
  return { sessionPath, sessionId };
2842
3072
  }
@@ -2846,8 +3076,8 @@ async function appendWatchRun(rootDir, run) {
2846
3076
  }
2847
3077
 
2848
3078
  // src/watch-state.ts
2849
- import fs7 from "fs/promises";
2850
- import path7 from "path";
3079
+ import fs8 from "fs/promises";
3080
+ import path8 from "path";
2851
3081
  import matter2 from "gray-matter";
2852
3082
  function pendingEntryKey(entry) {
2853
3083
  return entry.path;
@@ -2861,7 +3091,7 @@ function normalizeRelativePath(rootDir, filePath) {
2861
3091
  if (!filePath) {
2862
3092
  return void 0;
2863
3093
  }
2864
- return toPosix(path7.relative(rootDir, path7.resolve(filePath)));
3094
+ return toPosix(path8.relative(rootDir, path8.resolve(filePath)));
2865
3095
  }
2866
3096
  async function readPendingSemanticRefresh(rootDir) {
2867
3097
  const { paths } = await initWorkspace(rootDir);
@@ -2955,11 +3185,11 @@ async function markPagesStaleForSources(rootDir, sourceIds) {
2955
3185
  if (page.freshness !== "stale" || !page.sourceIds.some((sourceId) => affectedSourceIds.has(sourceId))) {
2956
3186
  continue;
2957
3187
  }
2958
- const absolutePath = path7.join(paths.wikiDir, page.path);
3188
+ const absolutePath = path8.join(paths.wikiDir, page.path);
2959
3189
  if (!await fileExists(absolutePath)) {
2960
3190
  continue;
2961
3191
  }
2962
- const raw = await fs7.readFile(absolutePath, "utf8");
3192
+ const raw = await fs8.readFile(absolutePath, "utf8");
2963
3193
  const parsed = matter2(raw);
2964
3194
  if (parsed.data.freshness === "stale") {
2965
3195
  continue;
@@ -2975,6 +3205,9 @@ async function markPagesStaleForSources(rootDir, sourceIds) {
2975
3205
  var DEFAULT_MAX_ASSET_SIZE = 10 * 1024 * 1024;
2976
3206
  var DEFAULT_MAX_DIRECTORY_FILES = 5e3;
2977
3207
  var BUILT_IN_REPO_IGNORES = /* @__PURE__ */ new Set([".git", "node_modules", "dist", "build", ".next", "coverage", ".venv", "vendor", "target"]);
3208
+ function uniqueStrings(values) {
3209
+ return [...new Set(values.filter(Boolean))];
3210
+ }
2978
3211
  function inferKind(mimeType, filePath) {
2979
3212
  if (inferCodeLanguage(filePath, mimeType)) {
2980
3213
  return "code";
@@ -3007,7 +3240,7 @@ function normalizeIngestOptions(options) {
3007
3240
  return {
3008
3241
  includeAssets: options?.includeAssets ?? true,
3009
3242
  maxAssetSize: Math.max(0, Math.floor(options?.maxAssetSize ?? DEFAULT_MAX_ASSET_SIZE)),
3010
- repoRoot: options?.repoRoot ? path8.resolve(options.repoRoot) : void 0,
3243
+ repoRoot: options?.repoRoot ? path9.resolve(options.repoRoot) : void 0,
3011
3244
  include: (options?.include ?? []).map((pattern) => pattern.trim()).filter(Boolean),
3012
3245
  exclude: (options?.exclude ?? []).map((pattern) => pattern.trim()).filter(Boolean),
3013
3246
  maxFiles: Math.max(1, Math.floor(options?.maxFiles ?? DEFAULT_MAX_DIRECTORY_FILES)),
@@ -3016,27 +3249,27 @@ function normalizeIngestOptions(options) {
3016
3249
  }
3017
3250
  function matchesAnyGlob(relativePath, patterns) {
3018
3251
  return patterns.some(
3019
- (pattern) => path8.matchesGlob(relativePath, pattern) || path8.matchesGlob(path8.posix.basename(relativePath), pattern)
3252
+ (pattern) => path9.matchesGlob(relativePath, pattern) || path9.matchesGlob(path9.posix.basename(relativePath), pattern)
3020
3253
  );
3021
3254
  }
3022
3255
  function supportedDirectoryKind(sourceKind) {
3023
3256
  return sourceKind !== "binary";
3024
3257
  }
3025
3258
  async function findNearestGitRoot2(startPath) {
3026
- let current = path8.resolve(startPath);
3259
+ let current = path9.resolve(startPath);
3027
3260
  try {
3028
- const stat = await fs8.stat(current);
3261
+ const stat = await fs9.stat(current);
3029
3262
  if (!stat.isDirectory()) {
3030
- current = path8.dirname(current);
3263
+ current = path9.dirname(current);
3031
3264
  }
3032
3265
  } catch {
3033
- current = path8.dirname(current);
3266
+ current = path9.dirname(current);
3034
3267
  }
3035
3268
  while (true) {
3036
- if (await fileExists(path8.join(current, ".git"))) {
3269
+ if (await fileExists(path9.join(current, ".git"))) {
3037
3270
  return current;
3038
3271
  }
3039
- const parent = path8.dirname(current);
3272
+ const parent = path9.dirname(current);
3040
3273
  if (parent === current) {
3041
3274
  return null;
3042
3275
  }
@@ -3044,26 +3277,26 @@ async function findNearestGitRoot2(startPath) {
3044
3277
  }
3045
3278
  }
3046
3279
  function withinRoot(rootPath, targetPath) {
3047
- const relative = path8.relative(rootPath, targetPath);
3048
- return relative === "" || !relative.startsWith("..") && !path8.isAbsolute(relative);
3280
+ const relative = path9.relative(rootPath, targetPath);
3281
+ return relative === "" || !relative.startsWith("..") && !path9.isAbsolute(relative);
3049
3282
  }
3050
3283
  function repoRootFromManifest(manifest) {
3051
3284
  if (manifest.originType !== "file" || !manifest.originalPath || !manifest.repoRelativePath) {
3052
3285
  return null;
3053
3286
  }
3054
- const repoDir = path8.posix.dirname(manifest.repoRelativePath);
3055
- const fileDir = path8.dirname(path8.resolve(manifest.originalPath));
3287
+ const repoDir = path9.posix.dirname(manifest.repoRelativePath);
3288
+ const fileDir = path9.dirname(path9.resolve(manifest.originalPath));
3056
3289
  if (repoDir === "." || !repoDir) {
3057
3290
  return fileDir;
3058
3291
  }
3059
3292
  const segments = repoDir.split("/").filter(Boolean);
3060
- return path8.resolve(fileDir, ...segments.map(() => ".."));
3293
+ return path9.resolve(fileDir, ...segments.map(() => ".."));
3061
3294
  }
3062
3295
  function repoRelativePathFor(absolutePath, repoRoot) {
3063
3296
  if (!repoRoot || !withinRoot(repoRoot, absolutePath)) {
3064
3297
  return void 0;
3065
3298
  }
3066
- const relative = toPosix(path8.relative(repoRoot, absolutePath));
3299
+ const relative = toPosix(path9.relative(repoRoot, absolutePath));
3067
3300
  return relative && !relative.startsWith("..") ? relative : void 0;
3068
3301
  }
3069
3302
  function normalizeOriginUrl(input) {
@@ -3092,6 +3325,22 @@ function arxivIdFromInput(input) {
3092
3325
  return null;
3093
3326
  }
3094
3327
  }
3328
+ function doiFromInput(input) {
3329
+ const trimmed = input.trim();
3330
+ if (/^10\.\S+\/\S+$/i.test(trimmed)) {
3331
+ return trimmed.replace(/\s+/g, "");
3332
+ }
3333
+ try {
3334
+ const url = new URL(trimmed);
3335
+ if (url.hostname === "doi.org" || url.hostname === "dx.doi.org") {
3336
+ const doi = decodeURIComponent(url.pathname.replace(/^\/+/, ""));
3337
+ return /^10\.\S+\/\S+$/i.test(doi) ? doi : null;
3338
+ }
3339
+ } catch {
3340
+ return null;
3341
+ }
3342
+ return null;
3343
+ }
3095
3344
  function isTweetUrl(input) {
3096
3345
  try {
3097
3346
  const url = new URL(input);
@@ -3101,26 +3350,25 @@ function isTweetUrl(input) {
3101
3350
  }
3102
3351
  }
3103
3352
  function markdownFrontmatter(value) {
3104
- const lines = ["---"];
3105
- for (const [key, rawValue] of Object.entries(value)) {
3106
- if (!rawValue) {
3107
- continue;
3108
- }
3109
- lines.push(`${key}: "${rawValue.replace(/\\/g, "\\\\").replace(/"/g, '\\"')}"`);
3110
- }
3111
- lines.push("---", "");
3112
- return lines;
3353
+ const normalized = Object.fromEntries(
3354
+ Object.entries(value).filter(
3355
+ ([, rawValue]) => Array.isArray(rawValue) ? rawValue.length > 0 : Boolean(typeof rawValue === "string" ? rawValue.trim() : rawValue)
3356
+ )
3357
+ );
3358
+ return matter3.stringify("", normalized).trimEnd().split("\n").concat([""]);
3113
3359
  }
3114
3360
  function prepareCapturedMarkdownInput(input) {
3115
3361
  return {
3116
3362
  title: input.title,
3117
3363
  originType: "url",
3118
3364
  sourceKind: "markdown",
3365
+ sourceType: input.sourceType,
3119
3366
  url: normalizeOriginUrl(input.url),
3120
3367
  mimeType: "text/markdown",
3121
3368
  storedExtension: ".md",
3122
3369
  payloadBytes: Buffer.from(input.markdown, "utf8"),
3123
3370
  extractedText: input.markdown,
3371
+ attachments: input.attachments,
3124
3372
  logDetails: input.logDetails
3125
3373
  };
3126
3374
  }
@@ -3131,6 +3379,17 @@ async function fetchText(url) {
3131
3379
  }
3132
3380
  return response.text();
3133
3381
  }
3382
+ async function fetchResolvedText(url) {
3383
+ const response = await fetch(url);
3384
+ if (!response.ok) {
3385
+ throw new Error(`Failed to fetch ${url}: ${response.status} ${response.statusText}`);
3386
+ }
3387
+ return {
3388
+ text: await response.text(),
3389
+ finalUrl: normalizeOriginUrl(response.url || url),
3390
+ contentType: response.headers.get("content-type")?.split(";")[0]?.trim() || "text/html"
3391
+ };
3392
+ }
3134
3393
  function domTextFromHtml(html, baseUrl) {
3135
3394
  const dom = new JSDOM(`<body>${html}</body>`, { url: baseUrl });
3136
3395
  return normalizeWhitespace(dom.window.document.body.textContent ?? "");
@@ -3150,11 +3409,16 @@ async function captureArxivMarkdown(input, options) {
3150
3409
  const authors = [...document.querySelectorAll('meta[name="citation_author"]')].map((node) => node.getAttribute("content")?.trim()).filter((value) => Boolean(value));
3151
3410
  const authorsText = authors.join(", ") || stripLeadingLabel(document.querySelector(".authors")?.textContent?.trim() ?? "", "Authors:");
3152
3411
  const abstract = stripLeadingLabel(document.querySelector("blockquote.abstract")?.textContent?.trim() ?? "", "Abstract:");
3412
+ const categories = [...document.querySelectorAll(".subheader .primary-subject, .metatable .tablecell.subjects")].flatMap((node) => (node.textContent ?? "").split(/;/g)).map((value) => value.trim()).filter(Boolean);
3153
3413
  const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
3154
3414
  const markdown = [
3155
3415
  ...markdownFrontmatter({
3156
- capture_type: "arxiv",
3416
+ source_type: "arxiv",
3157
3417
  source_url: normalizedUrl,
3418
+ canonical_url: normalizedUrl,
3419
+ title,
3420
+ authors,
3421
+ tags: uniqueStrings(categories),
3158
3422
  arxiv_id: arxivId,
3159
3423
  author: options.author,
3160
3424
  contributor: options.contributor,
@@ -3194,8 +3458,11 @@ async function captureTweetMarkdown(input, options) {
3194
3458
  const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
3195
3459
  const markdown = [
3196
3460
  ...markdownFrontmatter({
3197
- capture_type: "tweet",
3461
+ source_type: "tweet",
3198
3462
  source_url: normalizedUrl,
3463
+ canonical_url: canonicalUrl,
3464
+ title,
3465
+ authors: postAuthor ? [postAuthor] : void 0,
3199
3466
  author: options.author,
3200
3467
  contributor: options.contributor,
3201
3468
  captured_at: capturedAt
@@ -3217,6 +3484,101 @@ async function captureTweetMarkdown(input, options) {
3217
3484
  ].join("\n");
3218
3485
  return { title, normalizedUrl, markdown };
3219
3486
  }
3487
+ function firstMetaContent(document, selectors) {
3488
+ for (const selector of selectors) {
3489
+ const value = document.querySelector(selector)?.getAttribute("content")?.trim();
3490
+ if (value) {
3491
+ return value;
3492
+ }
3493
+ }
3494
+ return void 0;
3495
+ }
3496
+ function metaContents(document, selectors) {
3497
+ return uniqueStrings(
3498
+ selectors.flatMap(
3499
+ (selector) => [...document.querySelectorAll(selector)].map((node) => node.getAttribute("content")?.trim() ?? "").filter(Boolean)
3500
+ )
3501
+ );
3502
+ }
3503
+ function splitKeywords(value) {
3504
+ return uniqueStrings(
3505
+ (value ?? "").split(/[;,]/g).map((item) => item.trim()).filter(Boolean)
3506
+ );
3507
+ }
3508
+ async function captureArticleMarkdown(rootDir, input, options, extra = { sourceType: "article" }) {
3509
+ const resolved = await fetchResolvedText(input);
3510
+ if (!resolved.contentType.includes("html")) {
3511
+ throw new Error(`Unsupported article content type: ${resolved.contentType}`);
3512
+ }
3513
+ const dom = new JSDOM(resolved.text, { url: resolved.finalUrl });
3514
+ const document = dom.window.document;
3515
+ const canonicalHref = document.querySelector('link[rel="canonical"]')?.getAttribute("href")?.trim();
3516
+ const canonicalUrl = canonicalHref ? normalizeOriginUrl(new URL(canonicalHref, resolved.finalUrl).toString()) : resolved.finalUrl;
3517
+ const title = firstMetaContent(document, ['meta[name="citation_title"]', 'meta[property="og:title"]', 'meta[name="twitter:title"]']) ?? (document.title.trim() || canonicalUrl);
3518
+ const authors = uniqueStrings([
3519
+ ...metaContents(document, ['meta[name="citation_author"]']),
3520
+ ...metaContents(document, ['meta[name="author"]', 'meta[property="article:author"]'])
3521
+ ]);
3522
+ const publishedAt = firstMetaContent(document, [
3523
+ 'meta[name="citation_publication_date"]',
3524
+ 'meta[name="citation_online_date"]',
3525
+ 'meta[property="article:published_time"]',
3526
+ 'meta[name="pubdate"]'
3527
+ ]);
3528
+ const updatedAt = firstMetaContent(document, ['meta[property="article:modified_time"]', 'meta[name="lastmod"]']);
3529
+ const tags = uniqueStrings([
3530
+ ...metaContents(document, ['meta[property="article:tag"]']),
3531
+ ...splitKeywords(firstMetaContent(document, ['meta[name="keywords"]']))
3532
+ ]);
3533
+ const inferredDoi = extra.doi ?? firstMetaContent(document, ['meta[name="citation_doi"]', 'meta[name="dc.identifier"]'])?.replace(/^doi:\s*/i, "") ?? void 0;
3534
+ const normalizedOptions = normalizeIngestOptions(options);
3535
+ const prepared = await prepareUrlInput(rootDir, canonicalUrl, normalizedOptions);
3536
+ if (prepared.sourceKind !== "markdown" && prepared.sourceKind !== "text") {
3537
+ throw new Error(`Unsupported prepared article kind: ${prepared.sourceKind}`);
3538
+ }
3539
+ const body = prepared.extractedText ?? prepared.payloadBytes.toString("utf8");
3540
+ const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
3541
+ const markdown = [
3542
+ ...markdownFrontmatter({
3543
+ source_type: extra.sourceType,
3544
+ source_url: extra.sourceUrl ?? input,
3545
+ canonical_url: canonicalUrl,
3546
+ title,
3547
+ authors,
3548
+ published_at: publishedAt,
3549
+ updated_at: updatedAt,
3550
+ doi: inferredDoi,
3551
+ tags,
3552
+ author: options.author,
3553
+ contributor: options.contributor,
3554
+ captured_at: capturedAt
3555
+ }),
3556
+ body.trim(),
3557
+ "",
3558
+ "## Source",
3559
+ "",
3560
+ `- URL: ${canonicalUrl}`,
3561
+ ...extra.sourceType === "doi" && inferredDoi ? [`- DOI: ${inferredDoi}`] : [],
3562
+ ""
3563
+ ].join("\n");
3564
+ return {
3565
+ title,
3566
+ normalizedUrl: canonicalUrl,
3567
+ markdown,
3568
+ attachments: prepared.attachments
3569
+ };
3570
+ }
3571
+ async function captureDoiMarkdown(rootDir, input, options) {
3572
+ const doi = doiFromInput(input);
3573
+ if (!doi) {
3574
+ throw new Error(`Could not determine a DOI from ${input}`);
3575
+ }
3576
+ return captureArticleMarkdown(rootDir, `https://doi.org/${encodeURIComponent(doi)}`, options, {
3577
+ sourceType: "doi",
3578
+ sourceUrl: input,
3579
+ doi
3580
+ });
3581
+ }
3220
3582
  function manifestMatchesOrigin(manifest, prepared) {
3221
3583
  if (prepared.originType === "url") {
3222
3584
  return Boolean(prepared.url && manifest.url && normalizeOriginUrl(manifest.url) === normalizeOriginUrl(prepared.url));
@@ -3231,7 +3593,7 @@ function buildCompositeHash(payloadBytes, attachments = []) {
3231
3593
  return sha256(`${sha256(payloadBytes)}|${attachmentSignature}`);
3232
3594
  }
3233
3595
  function sanitizeAssetRelativePath(value) {
3234
- const normalized = path8.posix.normalize(value.replace(/\\/g, "/"));
3596
+ const normalized = path9.posix.normalize(value.replace(/\\/g, "/"));
3235
3597
  const segments = normalized.split("/").filter(Boolean).map((segment) => {
3236
3598
  if (segment === ".") {
3237
3599
  return "";
@@ -3251,7 +3613,7 @@ function normalizeLocalReference(value) {
3251
3613
  return null;
3252
3614
  }
3253
3615
  const lowered = candidate.toLowerCase();
3254
- if (lowered.startsWith("http://") || lowered.startsWith("https://") || lowered.startsWith("data:") || lowered.startsWith("mailto:") || lowered.startsWith("#") || path8.isAbsolute(candidate)) {
3616
+ if (lowered.startsWith("http://") || lowered.startsWith("https://") || lowered.startsWith("data:") || lowered.startsWith("mailto:") || lowered.startsWith("#") || path9.isAbsolute(candidate)) {
3255
3617
  return null;
3256
3618
  }
3257
3619
  return candidate.replace(/\\/g, "/");
@@ -3313,12 +3675,12 @@ async function convertHtmlToMarkdown(html, url) {
3313
3675
  };
3314
3676
  }
3315
3677
  async function readManifestByHash(manifestsDir, contentHash) {
3316
- const entries = await fs8.readdir(manifestsDir, { withFileTypes: true }).catch(() => []);
3678
+ const entries = await fs9.readdir(manifestsDir, { withFileTypes: true }).catch(() => []);
3317
3679
  for (const entry of entries) {
3318
3680
  if (!entry.isFile() || !entry.name.endsWith(".json")) {
3319
3681
  continue;
3320
3682
  }
3321
- const manifest = await readJsonFile(path8.join(manifestsDir, entry.name));
3683
+ const manifest = await readJsonFile(path9.join(manifestsDir, entry.name));
3322
3684
  if (manifest?.contentHash === contentHash) {
3323
3685
  return manifest;
3324
3686
  }
@@ -3326,12 +3688,12 @@ async function readManifestByHash(manifestsDir, contentHash) {
3326
3688
  return null;
3327
3689
  }
3328
3690
  async function readManifestByOrigin(manifestsDir, prepared) {
3329
- const entries = await fs8.readdir(manifestsDir, { withFileTypes: true }).catch(() => []);
3691
+ const entries = await fs9.readdir(manifestsDir, { withFileTypes: true }).catch(() => []);
3330
3692
  for (const entry of entries) {
3331
3693
  if (!entry.isFile() || !entry.name.endsWith(".json")) {
3332
3694
  continue;
3333
3695
  }
3334
- const manifest = await readJsonFile(path8.join(manifestsDir, entry.name));
3696
+ const manifest = await readJsonFile(path9.join(manifestsDir, entry.name));
3335
3697
  if (manifest && manifestMatchesOrigin(manifest, prepared)) {
3336
3698
  return manifest;
3337
3699
  }
@@ -3342,12 +3704,12 @@ async function loadGitignoreMatcher(repoRoot, enabled) {
3342
3704
  if (!enabled) {
3343
3705
  return null;
3344
3706
  }
3345
- const gitignorePath = path8.join(repoRoot, ".gitignore");
3707
+ const gitignorePath = path9.join(repoRoot, ".gitignore");
3346
3708
  if (!await fileExists(gitignorePath)) {
3347
3709
  return null;
3348
3710
  }
3349
3711
  const matcher = ignore();
3350
- matcher.add(await fs8.readFile(gitignorePath, "utf8"));
3712
+ matcher.add(await fs9.readFile(gitignorePath, "utf8"));
3351
3713
  return matcher;
3352
3714
  }
3353
3715
  function builtInIgnoreReason(relativePath) {
@@ -3368,23 +3730,23 @@ async function collectDirectoryFiles(rootDir, inputDir, repoRoot, options) {
3368
3730
  if (!currentDir) {
3369
3731
  continue;
3370
3732
  }
3371
- const entries = await fs8.readdir(currentDir, { withFileTypes: true });
3733
+ const entries = await fs9.readdir(currentDir, { withFileTypes: true });
3372
3734
  entries.sort((left, right) => left.name.localeCompare(right.name));
3373
3735
  for (const entry of entries) {
3374
- const absolutePath = path8.join(currentDir, entry.name);
3375
- const relativeToRepo = repoRelativePathFor(absolutePath, repoRoot) ?? toPosix(path8.relative(inputDir, absolutePath));
3736
+ const absolutePath = path9.join(currentDir, entry.name);
3737
+ const relativeToRepo = repoRelativePathFor(absolutePath, repoRoot) ?? toPosix(path9.relative(inputDir, absolutePath));
3376
3738
  const relativePath = relativeToRepo || entry.name;
3377
3739
  const builtInReason = builtInIgnoreReason(relativePath);
3378
3740
  if (builtInReason) {
3379
- skipped.push({ path: toPosix(path8.relative(rootDir, absolutePath)), reason: builtInReason });
3741
+ skipped.push({ path: toPosix(path9.relative(rootDir, absolutePath)), reason: builtInReason });
3380
3742
  continue;
3381
3743
  }
3382
3744
  if (matcher?.ignores(relativePath)) {
3383
- skipped.push({ path: toPosix(path8.relative(rootDir, absolutePath)), reason: "gitignore" });
3745
+ skipped.push({ path: toPosix(path9.relative(rootDir, absolutePath)), reason: "gitignore" });
3384
3746
  continue;
3385
3747
  }
3386
3748
  if (matchesAnyGlob(relativePath, options.exclude)) {
3387
- skipped.push({ path: toPosix(path8.relative(rootDir, absolutePath)), reason: "exclude_glob" });
3749
+ skipped.push({ path: toPosix(path9.relative(rootDir, absolutePath)), reason: "exclude_glob" });
3388
3750
  continue;
3389
3751
  }
3390
3752
  if (entry.isDirectory()) {
@@ -3392,21 +3754,21 @@ async function collectDirectoryFiles(rootDir, inputDir, repoRoot, options) {
3392
3754
  continue;
3393
3755
  }
3394
3756
  if (!entry.isFile()) {
3395
- skipped.push({ path: toPosix(path8.relative(rootDir, absolutePath)), reason: "unsupported_entry" });
3757
+ skipped.push({ path: toPosix(path9.relative(rootDir, absolutePath)), reason: "unsupported_entry" });
3396
3758
  continue;
3397
3759
  }
3398
3760
  if (options.include.length > 0 && !matchesAnyGlob(relativePath, options.include)) {
3399
- skipped.push({ path: toPosix(path8.relative(rootDir, absolutePath)), reason: "include_glob" });
3761
+ skipped.push({ path: toPosix(path9.relative(rootDir, absolutePath)), reason: "include_glob" });
3400
3762
  continue;
3401
3763
  }
3402
3764
  const mimeType = guessMimeType(absolutePath);
3403
3765
  const sourceKind = inferKind(mimeType, absolutePath);
3404
3766
  if (!supportedDirectoryKind(sourceKind)) {
3405
- skipped.push({ path: toPosix(path8.relative(rootDir, absolutePath)), reason: `unsupported_kind:${sourceKind}` });
3767
+ skipped.push({ path: toPosix(path9.relative(rootDir, absolutePath)), reason: `unsupported_kind:${sourceKind}` });
3406
3768
  continue;
3407
3769
  }
3408
3770
  if (files.length >= options.maxFiles) {
3409
- skipped.push({ path: toPosix(path8.relative(rootDir, absolutePath)), reason: "max_files" });
3771
+ skipped.push({ path: toPosix(path9.relative(rootDir, absolutePath)), reason: "max_files" });
3410
3772
  continue;
3411
3773
  }
3412
3774
  files.push(absolutePath);
@@ -3428,12 +3790,12 @@ function resolveUrlMimeType(input, response) {
3428
3790
  function buildRemoteAssetRelativePath(assetUrl, mimeType) {
3429
3791
  const url = new URL(assetUrl);
3430
3792
  const normalized = sanitizeAssetRelativePath(`${url.hostname}${url.pathname || "/asset"}`);
3431
- const extension = path8.posix.extname(normalized);
3432
- const directory = path8.posix.dirname(normalized);
3433
- const basename = extension ? path8.posix.basename(normalized, extension) : path8.posix.basename(normalized);
3793
+ const extension = path9.posix.extname(normalized);
3794
+ const directory = path9.posix.dirname(normalized);
3795
+ const basename = extension ? path9.posix.basename(normalized, extension) : path9.posix.basename(normalized);
3434
3796
  const resolvedExtension = extension || `.${mime.extension(mimeType) || "bin"}`;
3435
3797
  const hashedName = `${basename || "asset"}-${sha256(assetUrl).slice(0, 8)}${resolvedExtension}`;
3436
- return directory === "." ? hashedName : path8.posix.join(directory, hashedName);
3798
+ return directory === "." ? hashedName : path9.posix.join(directory, hashedName);
3437
3799
  }
3438
3800
  async function readResponseBytesWithinLimit(response, maxBytes) {
3439
3801
  const contentLength = Number.parseInt(response.headers.get("content-length") ?? "", 10);
@@ -3557,9 +3919,10 @@ async function persistPreparedInput(rootDir, prepared, paths) {
3557
3919
  await ensureDir(paths.extractsDir);
3558
3920
  const attachments = prepared.attachments ?? [];
3559
3921
  const contentHash = prepared.contentHash ?? buildCompositeHash(prepared.payloadBytes, attachments);
3922
+ const extractionHash = prepared.extractionHash ?? buildExtractionHash(prepared.extractedText, prepared.extractionArtifact);
3560
3923
  const existingByOrigin = await readManifestByOrigin(paths.manifestsDir, prepared);
3561
3924
  const existingByHash = existingByOrigin ? null : await readManifestByHash(paths.manifestsDir, contentHash);
3562
- if (existingByOrigin && existingByOrigin.contentHash === contentHash && existingByOrigin.title === prepared.title && existingByOrigin.sourceKind === prepared.sourceKind && existingByOrigin.language === prepared.language && existingByOrigin.mimeType === prepared.mimeType && existingByOrigin.repoRelativePath === prepared.repoRelativePath) {
3925
+ if (existingByOrigin && existingByOrigin.contentHash === contentHash && existingByOrigin.extractionHash === extractionHash && existingByOrigin.title === prepared.title && existingByOrigin.sourceKind === prepared.sourceKind && existingByOrigin.sourceType === prepared.sourceType && existingByOrigin.language === prepared.language && existingByOrigin.mimeType === prepared.mimeType && existingByOrigin.repoRelativePath === prepared.repoRelativePath) {
3563
3926
  return { manifest: existingByOrigin, isNew: false, wasUpdated: false };
3564
3927
  }
3565
3928
  if (existingByHash) {
@@ -3568,27 +3931,34 @@ async function persistPreparedInput(rootDir, prepared, paths) {
3568
3931
  const previous = existingByOrigin ?? void 0;
3569
3932
  const sourceId = previous?.sourceId ?? `${slugify(prepared.title)}-${contentHash.slice(0, 8)}`;
3570
3933
  const now = (/* @__PURE__ */ new Date()).toISOString();
3571
- const storedPath = path8.join(paths.rawSourcesDir, `${sourceId}${prepared.storedExtension}`);
3572
- const extractedTextPath = prepared.extractedText ? path8.join(paths.extractsDir, `${sourceId}.md`) : void 0;
3573
- const attachmentsDir = path8.join(paths.rawAssetsDir, sourceId);
3934
+ const storedPath = path9.join(paths.rawSourcesDir, `${sourceId}${prepared.storedExtension}`);
3935
+ const extractedTextPath = prepared.extractedText ? path9.join(paths.extractsDir, `${sourceId}.md`) : void 0;
3936
+ const extractedMetadataPath = prepared.extractionArtifact ? path9.join(paths.extractsDir, `${sourceId}.json`) : void 0;
3937
+ const attachmentsDir = path9.join(paths.rawAssetsDir, sourceId);
3574
3938
  if (previous?.storedPath) {
3575
- await fs8.rm(path8.resolve(rootDir, previous.storedPath), { force: true });
3939
+ await fs9.rm(path9.resolve(rootDir, previous.storedPath), { force: true });
3576
3940
  }
3577
3941
  if (previous?.extractedTextPath) {
3578
- await fs8.rm(path8.resolve(rootDir, previous.extractedTextPath), { force: true });
3942
+ await fs9.rm(path9.resolve(rootDir, previous.extractedTextPath), { force: true });
3943
+ }
3944
+ if (previous?.extractedMetadataPath) {
3945
+ await fs9.rm(path9.resolve(rootDir, previous.extractedMetadataPath), { force: true });
3579
3946
  }
3580
- await fs8.rm(attachmentsDir, { recursive: true, force: true });
3581
- await fs8.writeFile(storedPath, prepared.payloadBytes);
3947
+ await fs9.rm(attachmentsDir, { recursive: true, force: true });
3948
+ await fs9.writeFile(storedPath, prepared.payloadBytes);
3582
3949
  if (prepared.extractedText && extractedTextPath) {
3583
- await fs8.writeFile(extractedTextPath, prepared.extractedText, "utf8");
3950
+ await fs9.writeFile(extractedTextPath, prepared.extractedText, "utf8");
3951
+ }
3952
+ if (prepared.extractionArtifact && extractedMetadataPath) {
3953
+ await writeJsonFile(extractedMetadataPath, prepared.extractionArtifact);
3584
3954
  }
3585
3955
  const manifestAttachments = [];
3586
3956
  for (const attachment of attachments) {
3587
- const absoluteAttachmentPath = path8.join(attachmentsDir, attachment.relativePath);
3588
- await ensureDir(path8.dirname(absoluteAttachmentPath));
3589
- await fs8.writeFile(absoluteAttachmentPath, attachment.bytes);
3957
+ const absoluteAttachmentPath = path9.join(attachmentsDir, attachment.relativePath);
3958
+ await ensureDir(path9.dirname(absoluteAttachmentPath));
3959
+ await fs9.writeFile(absoluteAttachmentPath, attachment.bytes);
3590
3960
  manifestAttachments.push({
3591
- path: toPosix(path8.relative(rootDir, absoluteAttachmentPath)),
3961
+ path: toPosix(path9.relative(rootDir, absoluteAttachmentPath)),
3592
3962
  mimeType: attachment.mimeType,
3593
3963
  originalPath: attachment.originalPath
3594
3964
  });
@@ -3598,19 +3968,22 @@ async function persistPreparedInput(rootDir, prepared, paths) {
3598
3968
  title: prepared.title,
3599
3969
  originType: prepared.originType,
3600
3970
  sourceKind: prepared.sourceKind,
3971
+ sourceType: prepared.sourceType,
3601
3972
  language: prepared.language,
3602
3973
  originalPath: prepared.originalPath,
3603
3974
  repoRelativePath: prepared.repoRelativePath,
3604
3975
  url: prepared.url,
3605
- storedPath: toPosix(path8.relative(rootDir, storedPath)),
3606
- extractedTextPath: extractedTextPath ? toPosix(path8.relative(rootDir, extractedTextPath)) : void 0,
3976
+ storedPath: toPosix(path9.relative(rootDir, storedPath)),
3977
+ extractedTextPath: extractedTextPath ? toPosix(path9.relative(rootDir, extractedTextPath)) : void 0,
3978
+ extractedMetadataPath: extractedMetadataPath ? toPosix(path9.relative(rootDir, extractedMetadataPath)) : void 0,
3979
+ extractionHash,
3607
3980
  mimeType: prepared.mimeType,
3608
3981
  contentHash,
3609
3982
  createdAt: previous?.createdAt ?? now,
3610
3983
  updatedAt: now,
3611
3984
  attachments: manifestAttachments.length ? manifestAttachments : void 0
3612
3985
  };
3613
- await writeJsonFile(path8.join(paths.manifestsDir, `${sourceId}.json`), manifest);
3986
+ await writeJsonFile(path9.join(paths.manifestsDir, `${sourceId}.json`), manifest);
3614
3987
  await appendLogEntry(rootDir, "ingest", prepared.title, [
3615
3988
  `source_id=${sourceId}`,
3616
3989
  `kind=${prepared.sourceKind}`,
@@ -3628,13 +4001,16 @@ async function persistPreparedInput(rootDir, prepared, paths) {
3628
4001
  return { manifest, isNew: !previous, wasUpdated: Boolean(previous) };
3629
4002
  }
3630
4003
  async function removeManifestArtifacts(rootDir, manifest, paths) {
3631
- await fs8.rm(path8.join(paths.manifestsDir, `${manifest.sourceId}.json`), { force: true });
3632
- await fs8.rm(path8.resolve(rootDir, manifest.storedPath), { force: true });
4004
+ await fs9.rm(path9.join(paths.manifestsDir, `${manifest.sourceId}.json`), { force: true });
4005
+ await fs9.rm(path9.resolve(rootDir, manifest.storedPath), { force: true });
3633
4006
  if (manifest.extractedTextPath) {
3634
- await fs8.rm(path8.resolve(rootDir, manifest.extractedTextPath), { force: true });
4007
+ await fs9.rm(path9.resolve(rootDir, manifest.extractedTextPath), { force: true });
4008
+ }
4009
+ if (manifest.extractedMetadataPath) {
4010
+ await fs9.rm(path9.resolve(rootDir, manifest.extractedMetadataPath), { force: true });
3635
4011
  }
3636
- await fs8.rm(path8.join(paths.rawAssetsDir, manifest.sourceId), { recursive: true, force: true });
3637
- await fs8.rm(path8.join(paths.analysesDir, `${manifest.sourceId}.json`), { force: true });
4012
+ await fs9.rm(path9.join(paths.rawAssetsDir, manifest.sourceId), { recursive: true, force: true });
4013
+ await fs9.rm(path9.join(paths.analysesDir, `${manifest.sourceId}.json`), { force: true });
3638
4014
  }
3639
4015
  function repoSyncWorkspaceIgnorePaths(rootDir, paths, repoRoot) {
3640
4016
  const candidates = [
@@ -3643,14 +4019,14 @@ function repoSyncWorkspaceIgnorePaths(rootDir, paths, repoRoot) {
3643
4019
  paths.stateDir,
3644
4020
  paths.agentDir,
3645
4021
  paths.inboxDir,
3646
- path8.join(rootDir, ".claude"),
3647
- path8.join(rootDir, ".cursor"),
3648
- path8.join(rootDir, ".obsidian")
4022
+ path9.join(rootDir, ".claude"),
4023
+ path9.join(rootDir, ".cursor"),
4024
+ path9.join(rootDir, ".obsidian")
3649
4025
  ];
3650
- return candidates.map((candidate) => path8.resolve(candidate)).filter((candidate, index, items) => items.indexOf(candidate) === index).filter((candidate) => withinRoot(repoRoot, candidate));
4026
+ return candidates.map((candidate) => path9.resolve(candidate)).filter((candidate, index, items) => items.indexOf(candidate) === index).filter((candidate) => withinRoot(repoRoot, candidate));
3651
4027
  }
3652
4028
  function preparedMatchesManifest(manifest, prepared, contentHash) {
3653
- return manifest.contentHash === contentHash && manifest.title === prepared.title && manifest.sourceKind === prepared.sourceKind && manifest.language === prepared.language && manifest.mimeType === prepared.mimeType && manifest.repoRelativePath === prepared.repoRelativePath;
4029
+ return manifest.contentHash === contentHash && manifest.extractionHash === (prepared.extractionHash ?? buildExtractionHash(prepared.extractedText, prepared.extractionArtifact)) && manifest.title === prepared.title && manifest.sourceKind === prepared.sourceKind && manifest.sourceType === prepared.sourceType && manifest.language === prepared.language && manifest.mimeType === prepared.mimeType && manifest.repoRelativePath === prepared.repoRelativePath;
3654
4030
  }
3655
4031
  function shouldDeferWatchSemanticRefresh(sourceKind) {
3656
4032
  return sourceKind === "markdown" || sourceKind === "text" || sourceKind === "html" || sourceKind === "pdf" || sourceKind === "image";
@@ -3669,16 +4045,16 @@ async function syncTrackedRepos(rootDir, options, repoRoots) {
3669
4045
  const normalizedOptions = normalizeIngestOptions(options);
3670
4046
  const manifests = await listManifests(rootDir);
3671
4047
  const trackedRoots = (repoRoots && repoRoots.length > 0 ? repoRoots : await listTrackedRepoRoots(rootDir)).map(
3672
- (item) => path8.resolve(item)
4048
+ (item) => path9.resolve(item)
3673
4049
  );
3674
4050
  const uniqueRoots = [...new Set(trackedRoots)].sort((left, right) => left.localeCompare(right));
3675
4051
  const manifestsByRepoRoot = /* @__PURE__ */ new Map();
3676
4052
  for (const manifest of manifests) {
3677
4053
  const repoRoot = repoRootFromManifest(manifest);
3678
- if (!repoRoot || !uniqueRoots.includes(path8.resolve(repoRoot))) {
4054
+ if (!repoRoot || !uniqueRoots.includes(path9.resolve(repoRoot))) {
3679
4055
  continue;
3680
4056
  }
3681
- const key = path8.resolve(repoRoot);
4057
+ const key = path9.resolve(repoRoot);
3682
4058
  const bucket = manifestsByRepoRoot.get(key) ?? [];
3683
4059
  bucket.push(manifest);
3684
4060
  manifestsByRepoRoot.set(key, bucket);
@@ -3703,12 +4079,12 @@ async function syncTrackedRepos(rootDir, options, repoRoots) {
3703
4079
  skipped.push(
3704
4080
  ...collected.skipped,
3705
4081
  ...collected.files.filter((absolutePath) => ignoreRoots.some((ignoreRoot) => withinRoot(ignoreRoot, absolutePath))).map((absolutePath) => ({
3706
- path: toPosix(path8.relative(rootDir, absolutePath)),
4082
+ path: toPosix(path9.relative(rootDir, absolutePath)),
3707
4083
  reason: "workspace_generated"
3708
4084
  }))
3709
4085
  );
3710
4086
  scannedCount += files.length;
3711
- const currentPaths = new Set(files.map((absolutePath) => path8.resolve(absolutePath)));
4087
+ const currentPaths = new Set(files.map((absolutePath) => path9.resolve(absolutePath)));
3712
4088
  for (const absolutePath of files) {
3713
4089
  const prepared = await prepareFileInput(rootDir, absolutePath, repoRoot);
3714
4090
  const result = await persistPreparedInput(rootDir, prepared, paths);
@@ -3719,7 +4095,7 @@ async function syncTrackedRepos(rootDir, options, repoRoots) {
3719
4095
  }
3720
4096
  }
3721
4097
  for (const manifest of repoManifests) {
3722
- const originalPath = manifest.originalPath ? path8.resolve(manifest.originalPath) : null;
4098
+ const originalPath = manifest.originalPath ? path9.resolve(manifest.originalPath) : null;
3723
4099
  if (originalPath && !currentPaths.has(originalPath)) {
3724
4100
  await removeManifestArtifacts(rootDir, manifest, paths);
3725
4101
  removed.push(manifest);
@@ -3727,7 +4103,7 @@ async function syncTrackedRepos(rootDir, options, repoRoots) {
3727
4103
  }
3728
4104
  }
3729
4105
  if (uniqueRoots.length > 0) {
3730
- await appendLogEntry(rootDir, "sync_repo", uniqueRoots.map((repoRoot) => toPosix(path8.relative(rootDir, repoRoot)) || ".").join(","), [
4106
+ await appendLogEntry(rootDir, "sync_repo", uniqueRoots.map((repoRoot) => toPosix(path9.relative(rootDir, repoRoot)) || ".").join(","), [
3731
4107
  `repo_roots=${uniqueRoots.length}`,
3732
4108
  `scanned=${scannedCount}`,
3733
4109
  `imported=${imported.length}`,
@@ -3750,16 +4126,16 @@ async function syncTrackedReposForWatch(rootDir, options, repoRoots) {
3750
4126
  const normalizedOptions = normalizeIngestOptions(options);
3751
4127
  const manifests = await listManifests(rootDir);
3752
4128
  const trackedRoots = (repoRoots && repoRoots.length > 0 ? repoRoots : await listTrackedRepoRoots(rootDir)).map(
3753
- (item) => path8.resolve(item)
4129
+ (item) => path9.resolve(item)
3754
4130
  );
3755
4131
  const uniqueRoots = [...new Set(trackedRoots)].sort((left, right) => left.localeCompare(right));
3756
4132
  const manifestsByRepoRoot = /* @__PURE__ */ new Map();
3757
4133
  for (const manifest of manifests) {
3758
4134
  const repoRoot = repoRootFromManifest(manifest);
3759
- if (!repoRoot || !uniqueRoots.includes(path8.resolve(repoRoot))) {
4135
+ if (!repoRoot || !uniqueRoots.includes(path9.resolve(repoRoot))) {
3760
4136
  continue;
3761
4137
  }
3762
- const key = path8.resolve(repoRoot);
4138
+ const key = path9.resolve(repoRoot);
3763
4139
  const bucket = manifestsByRepoRoot.get(key) ?? [];
3764
4140
  bucket.push(manifest);
3765
4141
  manifestsByRepoRoot.set(key, bucket);
@@ -3774,7 +4150,7 @@ async function syncTrackedReposForWatch(rootDir, options, repoRoots) {
3774
4150
  for (const repoRoot of uniqueRoots) {
3775
4151
  const repoManifests = manifestsByRepoRoot.get(repoRoot) ?? [];
3776
4152
  const manifestsByOriginalPath = new Map(
3777
- repoManifests.filter((manifest) => manifest.originalPath).map((manifest) => [path8.resolve(manifest.originalPath), manifest])
4153
+ repoManifests.filter((manifest) => manifest.originalPath).map((manifest) => [path9.resolve(manifest.originalPath), manifest])
3778
4154
  );
3779
4155
  if (!await fileExists(repoRoot)) {
3780
4156
  for (const manifest of repoManifests) {
@@ -3782,7 +4158,7 @@ async function syncTrackedReposForWatch(rootDir, options, repoRoots) {
3782
4158
  pendingSemanticRefresh.push({
3783
4159
  id: pendingSemanticRefreshId("removed", repoRoot, manifest.repoRelativePath ?? manifest.storedPath),
3784
4160
  repoRoot,
3785
- path: toPosix(path8.relative(rootDir, manifest.originalPath ?? manifest.storedPath)),
4161
+ path: toPosix(path9.relative(rootDir, manifest.originalPath ?? manifest.storedPath)),
3786
4162
  changeType: "removed",
3787
4163
  detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
3788
4164
  sourceId: manifest.sourceId,
@@ -3802,16 +4178,16 @@ async function syncTrackedReposForWatch(rootDir, options, repoRoots) {
3802
4178
  skipped.push(
3803
4179
  ...collected.skipped,
3804
4180
  ...collected.files.filter((absolutePath) => ignoreRoots.some((ignoreRoot) => withinRoot(ignoreRoot, absolutePath))).map((absolutePath) => ({
3805
- path: toPosix(path8.relative(rootDir, absolutePath)),
4181
+ path: toPosix(path9.relative(rootDir, absolutePath)),
3806
4182
  reason: "workspace_generated"
3807
4183
  }))
3808
4184
  );
3809
4185
  scannedCount += files.length;
3810
- const currentPaths = new Set(files.map((absolutePath) => path8.resolve(absolutePath)));
4186
+ const currentPaths = new Set(files.map((absolutePath) => path9.resolve(absolutePath)));
3811
4187
  for (const absolutePath of files) {
3812
4188
  const prepared = await prepareFileInput(rootDir, absolutePath, repoRoot);
3813
4189
  if (shouldDeferWatchSemanticRefresh(prepared.sourceKind)) {
3814
- const existing = manifestsByOriginalPath.get(path8.resolve(absolutePath));
4190
+ const existing = manifestsByOriginalPath.get(path9.resolve(absolutePath));
3815
4191
  const contentHash = buildCompositeHash(prepared.payloadBytes, prepared.attachments);
3816
4192
  const changed = !existing || !preparedMatchesManifest(existing, prepared, contentHash);
3817
4193
  if (changed) {
@@ -3819,10 +4195,10 @@ async function syncTrackedReposForWatch(rootDir, options, repoRoots) {
3819
4195
  id: pendingSemanticRefreshId(
3820
4196
  existing ? "modified" : "added",
3821
4197
  repoRoot,
3822
- prepared.repoRelativePath ?? toPosix(path8.relative(repoRoot, absolutePath))
4198
+ prepared.repoRelativePath ?? toPosix(path9.relative(repoRoot, absolutePath))
3823
4199
  ),
3824
4200
  repoRoot,
3825
- path: toPosix(path8.relative(rootDir, absolutePath)),
4201
+ path: toPosix(path9.relative(rootDir, absolutePath)),
3826
4202
  changeType: existing ? "modified" : "added",
3827
4203
  detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
3828
4204
  sourceId: existing?.sourceId,
@@ -3842,13 +4218,13 @@ async function syncTrackedReposForWatch(rootDir, options, repoRoots) {
3842
4218
  }
3843
4219
  }
3844
4220
  for (const manifest of repoManifests) {
3845
- const originalPath = manifest.originalPath ? path8.resolve(manifest.originalPath) : null;
4221
+ const originalPath = manifest.originalPath ? path9.resolve(manifest.originalPath) : null;
3846
4222
  if (originalPath && !currentPaths.has(originalPath)) {
3847
4223
  if (shouldDeferWatchSemanticRefresh(manifest.sourceKind)) {
3848
4224
  pendingSemanticRefresh.push({
3849
- id: pendingSemanticRefreshId("removed", repoRoot, manifest.repoRelativePath ?? toPosix(path8.relative(repoRoot, originalPath))),
4225
+ id: pendingSemanticRefreshId("removed", repoRoot, manifest.repoRelativePath ?? toPosix(path9.relative(repoRoot, originalPath))),
3850
4226
  repoRoot,
3851
- path: toPosix(path8.relative(rootDir, originalPath)),
4227
+ path: toPosix(path9.relative(rootDir, originalPath)),
3852
4228
  changeType: "removed",
3853
4229
  detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
3854
4230
  sourceId: manifest.sourceId,
@@ -3866,7 +4242,7 @@ async function syncTrackedReposForWatch(rootDir, options, repoRoots) {
3866
4242
  await appendLogEntry(
3867
4243
  rootDir,
3868
4244
  "sync_repo_watch",
3869
- uniqueRoots.map((repoRoot) => toPosix(path8.relative(rootDir, repoRoot)) || ".").join(","),
4245
+ uniqueRoots.map((repoRoot) => toPosix(path9.relative(rootDir, repoRoot)) || ".").join(","),
3870
4246
  [
3871
4247
  `repo_roots=${uniqueRoots.length}`,
3872
4248
  `scanned=${scannedCount}`,
@@ -3891,19 +4267,36 @@ async function syncTrackedReposForWatch(rootDir, options, repoRoots) {
3891
4267
  staleSourceIds: [...staleSourceIds]
3892
4268
  };
3893
4269
  }
3894
- async function prepareFileInput(_rootDir, absoluteInput, repoRoot) {
3895
- const payloadBytes = await fs8.readFile(absoluteInput);
4270
+ async function prepareFileInput(rootDir, absoluteInput, repoRoot) {
4271
+ const payloadBytes = await fs9.readFile(absoluteInput);
3896
4272
  const mimeType = guessMimeType(absoluteInput);
3897
4273
  const sourceKind = inferKind(mimeType, absoluteInput);
3898
4274
  const language = inferCodeLanguage(absoluteInput, mimeType);
3899
- const storedExtension = path8.extname(absoluteInput) || `.${mime.extension(mimeType) || "bin"}`;
4275
+ const storedExtension = path9.extname(absoluteInput) || `.${mime.extension(mimeType) || "bin"}`;
3900
4276
  let title;
3901
4277
  let extractedText;
4278
+ let extractionArtifact;
3902
4279
  if (sourceKind === "markdown" || sourceKind === "text" || sourceKind === "code") {
3903
4280
  extractedText = payloadBytes.toString("utf8");
3904
- title = titleFromText(path8.basename(absoluteInput, path8.extname(absoluteInput)), extractedText);
4281
+ title = titleFromText(path9.basename(absoluteInput, path9.extname(absoluteInput)), extractedText);
4282
+ extractionArtifact = createPlainTextExtractionArtifact(sourceKind, mimeType);
4283
+ } else if (sourceKind === "pdf") {
4284
+ title = path9.basename(absoluteInput, path9.extname(absoluteInput));
4285
+ const extracted = await extractPdfText({ mimeType, bytes: payloadBytes });
4286
+ extractedText = extracted.extractedText;
4287
+ extractionArtifact = extracted.artifact;
4288
+ } else if (sourceKind === "image") {
4289
+ title = path9.basename(absoluteInput, path9.extname(absoluteInput));
4290
+ const extracted = await extractImageWithVision(rootDir, {
4291
+ title,
4292
+ mimeType,
4293
+ filePath: absoluteInput
4294
+ });
4295
+ title = extracted.title?.trim() || title;
4296
+ extractedText = extracted.extractedText;
4297
+ extractionArtifact = extracted.artifact;
3905
4298
  } else {
3906
- title = path8.basename(absoluteInput, path8.extname(absoluteInput));
4299
+ title = path9.basename(absoluteInput, path9.extname(absoluteInput));
3907
4300
  }
3908
4301
  return {
3909
4302
  title,
@@ -3915,15 +4308,18 @@ async function prepareFileInput(_rootDir, absoluteInput, repoRoot) {
3915
4308
  mimeType,
3916
4309
  storedExtension,
3917
4310
  payloadBytes,
3918
- extractedText
4311
+ extractedText,
4312
+ extractionArtifact,
4313
+ extractionHash: buildExtractionHash(extractedText, extractionArtifact)
3919
4314
  };
3920
4315
  }
3921
- async function prepareUrlInput(input, options) {
4316
+ async function prepareUrlInput(rootDir, input, options) {
3922
4317
  const response = await fetch(input);
3923
4318
  if (!response.ok) {
3924
4319
  throw new Error(`Failed to fetch ${input}: ${response.status} ${response.statusText}`);
3925
4320
  }
3926
- const inputUrl = new URL(input);
4321
+ const finalUrl = normalizeOriginUrl(response.url || input);
4322
+ const inputUrl = new URL(finalUrl);
3927
4323
  const originalPayloadBytes = Buffer.from(await response.arrayBuffer());
3928
4324
  let payloadBytes = originalPayloadBytes;
3929
4325
  let mimeType = resolveUrlMimeType(input, response);
@@ -3932,18 +4328,19 @@ async function prepareUrlInput(input, options) {
3932
4328
  let storedExtension = ".bin";
3933
4329
  let title = inputUrl.hostname + inputUrl.pathname;
3934
4330
  let extractedText;
4331
+ let extractionArtifact;
3935
4332
  let attachments;
3936
4333
  let contentHash;
3937
4334
  const logDetails = [];
3938
4335
  if (sourceKind === "html" || mimeType.startsWith("text/html")) {
3939
4336
  const html = originalPayloadBytes.toString("utf8");
3940
- const initialConversion = await convertHtmlToMarkdown(html, input);
4337
+ const initialConversion = await convertHtmlToMarkdown(html, finalUrl);
3941
4338
  title = initialConversion.title;
3942
4339
  let localizedHtml = html;
3943
4340
  let localAssetReplacements;
3944
4341
  if (options.includeAssets) {
3945
4342
  const { attachments: remoteAttachments, skippedCount } = await collectRemoteImageAttachments(
3946
- extractHtmlImageReferences(html, input),
4343
+ extractHtmlImageReferences(html, finalUrl),
3947
4344
  options
3948
4345
  );
3949
4346
  if (remoteAttachments.length) {
@@ -3953,18 +4350,19 @@ async function prepareUrlInput(input, options) {
3953
4350
  localAssetReplacements = new Map(
3954
4351
  remoteAttachments.map((attachment) => [attachment.originalPath ?? "", `../assets/${sourceId}/${attachment.relativePath}`])
3955
4352
  );
3956
- localizedHtml = rewriteHtmlImageReferences(html, input, localAssetReplacements);
4353
+ localizedHtml = rewriteHtmlImageReferences(html, finalUrl, localAssetReplacements);
3957
4354
  logDetails.push(`remote_assets=${remoteAttachments.length}`);
3958
4355
  }
3959
4356
  if (skippedCount) {
3960
4357
  logDetails.push(`remote_asset_skips=${skippedCount}`);
3961
4358
  }
3962
4359
  }
3963
- const converted = localizedHtml === html && !attachments?.length ? initialConversion : await convertHtmlToMarkdown(localizedHtml, input);
4360
+ const converted = localizedHtml === html && !attachments?.length ? initialConversion : await convertHtmlToMarkdown(localizedHtml, finalUrl);
3964
4361
  extractedText = converted.markdown;
4362
+ extractionArtifact = createHtmlReadabilityExtractionArtifact("markdown", "text/markdown");
3965
4363
  if (localAssetReplacements?.size) {
3966
4364
  const absoluteLocalAssetReplacements = new Map(
3967
- [...localAssetReplacements.values()].map((replacement) => [new URL(replacement, input).toString(), replacement])
4365
+ [...localAssetReplacements.values()].map((replacement) => [new URL(replacement, finalUrl).toString(), replacement])
3968
4366
  );
3969
4367
  extractedText = rewriteMarkdownImageTargets(extractedText, absoluteLocalAssetReplacements);
3970
4368
  }
@@ -3973,14 +4371,15 @@ async function prepareUrlInput(input, options) {
3973
4371
  sourceKind = "markdown";
3974
4372
  storedExtension = ".md";
3975
4373
  } else {
3976
- const extension = path8.extname(inputUrl.pathname);
4374
+ const extension = path9.extname(inputUrl.pathname);
3977
4375
  storedExtension = extension || `.${mime.extension(mimeType) || "bin"}`;
3978
4376
  if (sourceKind === "markdown" || sourceKind === "text" || sourceKind === "code") {
3979
4377
  extractedText = payloadBytes.toString("utf8");
3980
4378
  title = titleFromText(title || inputUrl.hostname, extractedText);
4379
+ extractionArtifact = createPlainTextExtractionArtifact(sourceKind, mimeType);
3981
4380
  if (sourceKind === "markdown" && options.includeAssets) {
3982
4381
  const { attachments: remoteAttachments, skippedCount } = await collectRemoteImageAttachments(
3983
- extractMarkdownImageReferences(extractedText, input),
4382
+ extractMarkdownImageReferences(extractedText, finalUrl),
3984
4383
  options
3985
4384
  );
3986
4385
  if (remoteAttachments.length) {
@@ -3990,7 +4389,7 @@ async function prepareUrlInput(input, options) {
3990
4389
  const replacements = new Map(
3991
4390
  remoteAttachments.map((attachment) => [attachment.originalPath ?? "", `../assets/${sourceId}/${attachment.relativePath}`])
3992
4391
  );
3993
- extractedText = rewriteMarkdownImageReferences(extractedText, input, replacements);
4392
+ extractedText = rewriteMarkdownImageReferences(extractedText, finalUrl, replacements);
3994
4393
  payloadBytes = Buffer.from(extractedText, "utf8");
3995
4394
  logDetails.push(`remote_assets=${remoteAttachments.length}`);
3996
4395
  }
@@ -3998,6 +4397,19 @@ async function prepareUrlInput(input, options) {
3998
4397
  logDetails.push(`remote_asset_skips=${skippedCount}`);
3999
4398
  }
4000
4399
  }
4400
+ } else if (sourceKind === "pdf") {
4401
+ const extracted = await extractPdfText({ mimeType, bytes: payloadBytes });
4402
+ extractedText = extracted.extractedText;
4403
+ extractionArtifact = extracted.artifact;
4404
+ } else if (sourceKind === "image") {
4405
+ const extracted = await extractImageWithVision(rootDir, {
4406
+ title,
4407
+ mimeType,
4408
+ bytes: payloadBytes
4409
+ });
4410
+ title = extracted.title?.trim() || title;
4411
+ extractedText = extracted.extractedText;
4412
+ extractionArtifact = extracted.artifact;
4001
4413
  }
4002
4414
  }
4003
4415
  return {
@@ -4005,11 +4417,13 @@ async function prepareUrlInput(input, options) {
4005
4417
  originType: "url",
4006
4418
  sourceKind,
4007
4419
  language,
4008
- url: input,
4420
+ url: finalUrl,
4009
4421
  mimeType,
4010
4422
  storedExtension,
4011
4423
  payloadBytes,
4012
4424
  extractedText,
4425
+ extractionArtifact,
4426
+ extractionHash: buildExtractionHash(extractedText, extractionArtifact),
4013
4427
  attachments,
4014
4428
  contentHash,
4015
4429
  logDetails
@@ -4023,14 +4437,14 @@ async function collectInboxAttachmentRefs(inputDir, files) {
4023
4437
  if (sourceKind !== "markdown") {
4024
4438
  continue;
4025
4439
  }
4026
- const content = await fs8.readFile(absolutePath, "utf8");
4440
+ const content = await fs9.readFile(absolutePath, "utf8");
4027
4441
  const refs = extractMarkdownReferences(content);
4028
4442
  if (!refs.length) {
4029
4443
  continue;
4030
4444
  }
4031
4445
  const sourceRefs = [];
4032
4446
  for (const ref of refs) {
4033
- const resolved = path8.resolve(path8.dirname(absolutePath), ref);
4447
+ const resolved = path9.resolve(path9.dirname(absolutePath), ref);
4034
4448
  if (!resolved.startsWith(inputDir) || !await fileExists(resolved)) {
4035
4449
  continue;
4036
4450
  }
@@ -4064,12 +4478,12 @@ function rewriteMarkdownReferences(content, replacements) {
4064
4478
  });
4065
4479
  }
4066
4480
  async function prepareInboxMarkdownInput(absolutePath, attachmentRefs) {
4067
- const originalBytes = await fs8.readFile(absolutePath);
4481
+ const originalBytes = await fs9.readFile(absolutePath);
4068
4482
  const originalText = originalBytes.toString("utf8");
4069
- const title = titleFromText(path8.basename(absolutePath, path8.extname(absolutePath)), originalText);
4483
+ const title = titleFromText(path9.basename(absolutePath, path9.extname(absolutePath)), originalText);
4070
4484
  const attachments = [];
4071
4485
  for (const attachmentRef of attachmentRefs) {
4072
- const bytes = await fs8.readFile(attachmentRef.absolutePath);
4486
+ const bytes = await fs9.readFile(attachmentRef.absolutePath);
4073
4487
  attachments.push({
4074
4488
  relativePath: sanitizeAssetRelativePath(attachmentRef.relativeRef),
4075
4489
  mimeType: guessMimeType(attachmentRef.absolutePath),
@@ -4086,15 +4500,18 @@ async function prepareInboxMarkdownInput(absolutePath, attachmentRefs) {
4086
4500
  ])
4087
4501
  );
4088
4502
  const rewrittenText = rewriteMarkdownReferences(originalText, replacements);
4503
+ const extractionArtifact = createPlainTextExtractionArtifact("markdown", "text/markdown");
4089
4504
  return {
4090
4505
  title,
4091
4506
  originType: "file",
4092
4507
  sourceKind: "markdown",
4093
4508
  originalPath: toPosix(absolutePath),
4094
4509
  mimeType: "text/markdown",
4095
- storedExtension: path8.extname(absolutePath) || ".md",
4510
+ storedExtension: path9.extname(absolutePath) || ".md",
4096
4511
  payloadBytes: Buffer.from(rewrittenText, "utf8"),
4097
4512
  extractedText: rewrittenText,
4513
+ extractionArtifact,
4514
+ extractionHash: buildExtractionHash(rewrittenText, extractionArtifact),
4098
4515
  attachments,
4099
4516
  contentHash
4100
4517
  };
@@ -4105,16 +4522,16 @@ function isSupportedInboxKind(sourceKind) {
4105
4522
  async function ingestInput(rootDir, input, options) {
4106
4523
  const { paths } = await initWorkspace(rootDir);
4107
4524
  const normalizedOptions = normalizeIngestOptions(options);
4108
- const absoluteInput = path8.resolve(rootDir, input);
4109
- const repoRoot = isHttpUrl(input) || normalizedOptions.repoRoot ? normalizedOptions.repoRoot : await findNearestGitRoot2(absoluteInput).then((value) => value ?? path8.dirname(absoluteInput));
4110
- const prepared = isHttpUrl(input) ? await prepareUrlInput(input, normalizedOptions) : await prepareFileInput(rootDir, absoluteInput, repoRoot);
4525
+ const absoluteInput = path9.resolve(rootDir, input);
4526
+ const repoRoot = isHttpUrl(input) || normalizedOptions.repoRoot ? normalizedOptions.repoRoot : await findNearestGitRoot2(absoluteInput).then((value) => value ?? path9.dirname(absoluteInput));
4527
+ const prepared = isHttpUrl(input) ? await prepareUrlInput(rootDir, input, normalizedOptions) : await prepareFileInput(rootDir, absoluteInput, repoRoot);
4111
4528
  const result = await persistPreparedInput(rootDir, prepared, paths);
4112
4529
  return result.manifest;
4113
4530
  }
4114
4531
  async function addInput(rootDir, input, options = {}) {
4115
4532
  const { paths } = await initWorkspace(rootDir);
4116
- if (!isHttpUrl(input) && !arxivIdFromInput(input)) {
4117
- throw new Error("`swarmvault add` only supports URLs and bare arXiv ids in the current release.");
4533
+ if (!isHttpUrl(input) && !arxivIdFromInput(input) && !doiFromInput(input)) {
4534
+ throw new Error("`swarmvault add` only supports URLs, bare arXiv ids, and bare DOI strings in the current release.");
4118
4535
  }
4119
4536
  let prepared = null;
4120
4537
  let captureType = "url";
@@ -4127,26 +4544,55 @@ async function addInput(rootDir, input, options = {}) {
4127
4544
  title: captured.title,
4128
4545
  url: captured.normalizedUrl,
4129
4546
  markdown: captured.markdown,
4547
+ sourceType: "arxiv",
4130
4548
  logDetails: ["capture_type=arxiv"]
4131
4549
  });
4132
4550
  captureType = "arxiv";
4133
4551
  normalizedUrl = captured.normalizedUrl;
4552
+ } else if (doiFromInput(input)) {
4553
+ const captured = await captureDoiMarkdown(rootDir, input, options);
4554
+ prepared = prepareCapturedMarkdownInput({
4555
+ title: captured.title,
4556
+ url: captured.normalizedUrl,
4557
+ markdown: captured.markdown,
4558
+ sourceType: "doi",
4559
+ attachments: captured.attachments,
4560
+ logDetails: ["capture_type=doi"]
4561
+ });
4562
+ captureType = "doi";
4563
+ normalizedUrl = captured.normalizedUrl;
4134
4564
  } else if (isTweetUrl(input)) {
4135
4565
  const captured = await captureTweetMarkdown(input, options);
4136
4566
  prepared = prepareCapturedMarkdownInput({
4137
4567
  title: captured.title,
4138
4568
  url: captured.normalizedUrl,
4139
4569
  markdown: captured.markdown,
4570
+ sourceType: "tweet",
4140
4571
  logDetails: ["capture_type=tweet"]
4141
4572
  });
4142
4573
  captureType = "tweet";
4143
4574
  normalizedUrl = captured.normalizedUrl;
4575
+ } else if (isHttpUrl(input)) {
4576
+ const captured = await captureArticleMarkdown(rootDir, input, options, {
4577
+ sourceType: "article",
4578
+ sourceUrl: input
4579
+ });
4580
+ prepared = prepareCapturedMarkdownInput({
4581
+ title: captured.title,
4582
+ url: captured.normalizedUrl,
4583
+ markdown: captured.markdown,
4584
+ sourceType: "article",
4585
+ attachments: captured.attachments,
4586
+ logDetails: ["capture_type=article"]
4587
+ });
4588
+ captureType = "article";
4589
+ normalizedUrl = captured.normalizedUrl;
4144
4590
  }
4145
4591
  } catch {
4146
4592
  fallback = true;
4147
4593
  }
4148
4594
  if (!prepared) {
4149
- normalizedUrl = arxivIdFromInput(input) ? `https://arxiv.org/abs/${arxivIdFromInput(input)}` : normalizeOriginUrl(input);
4595
+ normalizedUrl = arxivIdFromInput(input) ? `https://arxiv.org/abs/${arxivIdFromInput(input)}` : doiFromInput(input) ? `https://doi.org/${encodeURIComponent(doiFromInput(input) ?? "")}` : normalizeOriginUrl(input);
4150
4596
  return {
4151
4597
  captureType: "url",
4152
4598
  manifest: await ingestInput(rootDir, normalizedUrl, options),
@@ -4167,7 +4613,7 @@ async function addInput(rootDir, input, options = {}) {
4167
4613
  async function ingestDirectory(rootDir, inputDir, options) {
4168
4614
  const { paths } = await initWorkspace(rootDir);
4169
4615
  const normalizedOptions = normalizeIngestOptions(options);
4170
- const absoluteInputDir = path8.resolve(rootDir, inputDir);
4616
+ const absoluteInputDir = path9.resolve(rootDir, inputDir);
4171
4617
  const repoRoot = normalizedOptions.repoRoot ?? await findNearestGitRoot2(absoluteInputDir) ?? absoluteInputDir;
4172
4618
  if (!await fileExists(absoluteInputDir)) {
4173
4619
  throw new Error(`Directory not found: ${absoluteInputDir}`);
@@ -4183,11 +4629,11 @@ async function ingestDirectory(rootDir, inputDir, options) {
4183
4629
  } else if (result.wasUpdated) {
4184
4630
  updated.push(result.manifest);
4185
4631
  } else {
4186
- skipped.push({ path: toPosix(path8.relative(rootDir, absolutePath)), reason: "duplicate_content" });
4632
+ skipped.push({ path: toPosix(path9.relative(rootDir, absolutePath)), reason: "duplicate_content" });
4187
4633
  }
4188
4634
  }
4189
- await appendLogEntry(rootDir, "ingest_directory", toPosix(path8.relative(rootDir, absoluteInputDir)) || ".", [
4190
- `repo_root=${toPosix(path8.relative(rootDir, repoRoot)) || "."}`,
4635
+ await appendLogEntry(rootDir, "ingest_directory", toPosix(path9.relative(rootDir, absoluteInputDir)) || ".", [
4636
+ `repo_root=${toPosix(path9.relative(rootDir, repoRoot)) || "."}`,
4191
4637
  `scanned=${files.length}`,
4192
4638
  `imported=${imported.length}`,
4193
4639
  `updated=${updated.length}`,
@@ -4204,7 +4650,7 @@ async function ingestDirectory(rootDir, inputDir, options) {
4204
4650
  }
4205
4651
  async function importInbox(rootDir, inputDir) {
4206
4652
  const { paths } = await initWorkspace(rootDir);
4207
- const effectiveInputDir = path8.resolve(rootDir, inputDir ?? paths.inboxDir);
4653
+ const effectiveInputDir = path9.resolve(rootDir, inputDir ?? paths.inboxDir);
4208
4654
  if (!await fileExists(effectiveInputDir)) {
4209
4655
  throw new Error(`Inbox directory not found: ${effectiveInputDir}`);
4210
4656
  }
@@ -4215,31 +4661,31 @@ async function importInbox(rootDir, inputDir) {
4215
4661
  const skipped = [];
4216
4662
  let attachmentCount = 0;
4217
4663
  for (const absolutePath of files) {
4218
- const basename = path8.basename(absolutePath);
4664
+ const basename = path9.basename(absolutePath);
4219
4665
  if (basename.startsWith(".")) {
4220
- skipped.push({ path: toPosix(path8.relative(rootDir, absolutePath)), reason: "hidden_file" });
4666
+ skipped.push({ path: toPosix(path9.relative(rootDir, absolutePath)), reason: "hidden_file" });
4221
4667
  continue;
4222
4668
  }
4223
4669
  if (claimedAttachments.has(absolutePath)) {
4224
- skipped.push({ path: toPosix(path8.relative(rootDir, absolutePath)), reason: "referenced_attachment" });
4670
+ skipped.push({ path: toPosix(path9.relative(rootDir, absolutePath)), reason: "referenced_attachment" });
4225
4671
  continue;
4226
4672
  }
4227
4673
  const mimeType = guessMimeType(absolutePath);
4228
4674
  const sourceKind = inferKind(mimeType, absolutePath);
4229
4675
  if (!isSupportedInboxKind(sourceKind)) {
4230
- skipped.push({ path: toPosix(path8.relative(rootDir, absolutePath)), reason: `unsupported_kind:${sourceKind}` });
4676
+ skipped.push({ path: toPosix(path9.relative(rootDir, absolutePath)), reason: `unsupported_kind:${sourceKind}` });
4231
4677
  continue;
4232
4678
  }
4233
4679
  const prepared = sourceKind === "markdown" && refsBySource.has(absolutePath) ? await prepareInboxMarkdownInput(absolutePath, refsBySource.get(absolutePath) ?? []) : await prepareFileInput(rootDir, absolutePath);
4234
4680
  const result = await persistPreparedInput(rootDir, prepared, paths);
4235
4681
  if (!result.isNew) {
4236
- skipped.push({ path: toPosix(path8.relative(rootDir, absolutePath)), reason: "duplicate_content" });
4682
+ skipped.push({ path: toPosix(path9.relative(rootDir, absolutePath)), reason: "duplicate_content" });
4237
4683
  continue;
4238
4684
  }
4239
4685
  attachmentCount += result.manifest.attachments?.length ?? 0;
4240
4686
  imported.push(result.manifest);
4241
4687
  }
4242
- await appendLogEntry(rootDir, "inbox_import", toPosix(path8.relative(rootDir, effectiveInputDir)) || ".", [
4688
+ await appendLogEntry(rootDir, "inbox_import", toPosix(path9.relative(rootDir, effectiveInputDir)) || ".", [
4243
4689
  `scanned=${files.length}`,
4244
4690
  `imported=${imported.length}`,
4245
4691
  `attachments=${attachmentCount}`,
@@ -4258,9 +4704,9 @@ async function listManifests(rootDir) {
4258
4704
  if (!await fileExists(paths.manifestsDir)) {
4259
4705
  return [];
4260
4706
  }
4261
- const entries = await fs8.readdir(paths.manifestsDir);
4707
+ const entries = await fs9.readdir(paths.manifestsDir);
4262
4708
  const manifests = await Promise.all(
4263
- entries.filter((entry) => entry.endsWith(".json")).map((entry) => readJsonFile(path8.join(paths.manifestsDir, entry)))
4709
+ entries.filter((entry) => entry.endsWith(".json")).map((entry) => readJsonFile(path9.join(paths.manifestsDir, entry)))
4264
4710
  );
4265
4711
  return manifests.filter((manifest) => Boolean(manifest));
4266
4712
  }
@@ -4268,28 +4714,38 @@ async function readExtractedText(rootDir, manifest) {
4268
4714
  if (!manifest.extractedTextPath) {
4269
4715
  return void 0;
4270
4716
  }
4271
- const absolutePath = path8.resolve(rootDir, manifest.extractedTextPath);
4717
+ const absolutePath = path9.resolve(rootDir, manifest.extractedTextPath);
4718
+ if (!await fileExists(absolutePath)) {
4719
+ return void 0;
4720
+ }
4721
+ return fs9.readFile(absolutePath, "utf8");
4722
+ }
4723
+ async function readExtractionArtifact(rootDir, manifest) {
4724
+ if (!manifest.extractedMetadataPath) {
4725
+ return void 0;
4726
+ }
4727
+ const absolutePath = path9.resolve(rootDir, manifest.extractedMetadataPath);
4272
4728
  if (!await fileExists(absolutePath)) {
4273
4729
  return void 0;
4274
4730
  }
4275
- return fs8.readFile(absolutePath, "utf8");
4731
+ return await readJsonFile(absolutePath) ?? void 0;
4276
4732
  }
4277
4733
 
4278
4734
  // src/mcp.ts
4279
- import fs15 from "fs/promises";
4280
- import path18 from "path";
4735
+ import fs16 from "fs/promises";
4736
+ import path19 from "path";
4281
4737
  import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
4282
4738
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4283
- import { z as z7 } from "zod";
4739
+ import { z as z8 } from "zod";
4284
4740
 
4285
4741
  // src/schema.ts
4286
- import fs9 from "fs/promises";
4287
- import path9 from "path";
4742
+ import fs10 from "fs/promises";
4743
+ import path10 from "path";
4288
4744
  function normalizeSchemaContent(content) {
4289
4745
  return content.trim() ? content.trim() : defaultVaultSchema().trim();
4290
4746
  }
4291
4747
  async function readSchemaFile(schemaPath, fallback = defaultVaultSchema()) {
4292
- const content = await fileExists(schemaPath) ? await fs9.readFile(schemaPath, "utf8") : fallback;
4748
+ const content = await fileExists(schemaPath) ? await fs10.readFile(schemaPath, "utf8") : fallback;
4293
4749
  const normalized = normalizeSchemaContent(content);
4294
4750
  return {
4295
4751
  path: schemaPath,
@@ -4298,7 +4754,7 @@ async function readSchemaFile(schemaPath, fallback = defaultVaultSchema()) {
4298
4754
  };
4299
4755
  }
4300
4756
  function resolveProjectSchemaPath(rootDir, schemaPath) {
4301
- return path9.resolve(rootDir, schemaPath);
4757
+ return path10.resolve(rootDir, schemaPath);
4302
4758
  }
4303
4759
  function composeVaultSchema(root, projectSchemas = []) {
4304
4760
  if (!projectSchemas.length) {
@@ -4314,7 +4770,7 @@ function composeVaultSchema(root, projectSchemas = []) {
4314
4770
  (schema) => [
4315
4771
  `## Project Schema`,
4316
4772
  "",
4317
- `Path: ${toPosix(path9.relative(path9.dirname(root.path), schema.path) || schema.path)}`,
4773
+ `Path: ${toPosix(path10.relative(path10.dirname(root.path), schema.path) || schema.path)}`,
4318
4774
  "",
4319
4775
  schema.content
4320
4776
  ].join("\n")
@@ -4390,30 +4846,30 @@ function buildSchemaPrompt(schema, instruction) {
4390
4846
  }
4391
4847
 
4392
4848
  // src/vault.ts
4393
- import fs14 from "fs/promises";
4394
- import path17 from "path";
4395
- import matter8 from "gray-matter";
4396
- import { z as z6 } from "zod";
4849
+ import fs15 from "fs/promises";
4850
+ import path18 from "path";
4851
+ import matter9 from "gray-matter";
4852
+ import { z as z7 } from "zod";
4397
4853
 
4398
4854
  // src/analysis.ts
4399
- import path10 from "path";
4400
- import { z } from "zod";
4401
- var ANALYSIS_FORMAT_VERSION = 4;
4402
- var sourceAnalysisSchema = z.object({
4403
- title: z.string().min(1),
4404
- summary: z.string().min(1),
4405
- concepts: z.array(z.object({ name: z.string().min(1), description: z.string().default("") })).max(12).default([]),
4406
- entities: z.array(z.object({ name: z.string().min(1), description: z.string().default("") })).max(12).default([]),
4407
- claims: z.array(
4408
- z.object({
4409
- text: z.string().min(1),
4410
- confidence: z.number().min(0).max(1).default(0.6),
4411
- status: z.enum(["extracted", "inferred", "conflicted", "stale"]).default("extracted"),
4412
- polarity: z.enum(["positive", "negative", "neutral"]).default("neutral"),
4413
- citation: z.string().min(1)
4855
+ import path11 from "path";
4856
+ import { z as z2 } from "zod";
4857
+ var ANALYSIS_FORMAT_VERSION = 5;
4858
+ var sourceAnalysisSchema = z2.object({
4859
+ title: z2.string().min(1),
4860
+ summary: z2.string().min(1),
4861
+ concepts: z2.array(z2.object({ name: z2.string().min(1), description: z2.string().default("") })).max(12).default([]),
4862
+ entities: z2.array(z2.object({ name: z2.string().min(1), description: z2.string().default("") })).max(12).default([]),
4863
+ claims: z2.array(
4864
+ z2.object({
4865
+ text: z2.string().min(1),
4866
+ confidence: z2.number().min(0).max(1).default(0.6),
4867
+ status: z2.enum(["extracted", "inferred", "conflicted", "stale"]).default("extracted"),
4868
+ polarity: z2.enum(["positive", "negative", "neutral"]).default("neutral"),
4869
+ citation: z2.string().min(1)
4414
4870
  })
4415
4871
  ).max(8).default([]),
4416
- questions: z.array(z.string()).max(6).default([])
4872
+ questions: z2.array(z2.string()).max(6).default([])
4417
4873
  });
4418
4874
  var STOPWORDS = /* @__PURE__ */ new Set([
4419
4875
  "about",
@@ -4502,6 +4958,7 @@ function heuristicAnalysis(manifest, text, schemaHash) {
4502
4958
  analysisVersion: ANALYSIS_FORMAT_VERSION,
4503
4959
  sourceId: manifest.sourceId,
4504
4960
  sourceHash: manifest.contentHash,
4961
+ extractionHash: manifest.extractionHash,
4505
4962
  schemaHash,
4506
4963
  title: deriveTitle(manifest, text),
4507
4964
  summary: firstSentences(normalized, 3) || truncate(normalized, 280) || `Imported ${manifest.sourceKind} source.`,
@@ -4548,6 +5005,7 @@ ${truncate(text, 18e3)}`
4548
5005
  analysisVersion: ANALYSIS_FORMAT_VERSION,
4549
5006
  sourceId: manifest.sourceId,
4550
5007
  sourceHash: manifest.contentHash,
5008
+ extractionHash: manifest.extractionHash,
4551
5009
  schemaHash: schema.hash,
4552
5010
  title: parsed.title,
4553
5011
  summary: parsed.summary,
@@ -4574,24 +5032,97 @@ ${truncate(text, 18e3)}`
4574
5032
  producedAt: (/* @__PURE__ */ new Date()).toISOString()
4575
5033
  };
4576
5034
  }
5035
+ function analysisFromVisionExtraction(manifest, extraction, schemaHash) {
5036
+ if (!extraction.vision) {
5037
+ return null;
5038
+ }
5039
+ return {
5040
+ analysisVersion: ANALYSIS_FORMAT_VERSION,
5041
+ sourceId: manifest.sourceId,
5042
+ sourceHash: manifest.contentHash,
5043
+ extractionHash: manifest.extractionHash,
5044
+ schemaHash,
5045
+ title: extraction.vision.title?.trim() || manifest.title,
5046
+ summary: extraction.vision.summary,
5047
+ concepts: extraction.vision.concepts.map((term) => ({
5048
+ id: `concept:${slugify(term.name)}`,
5049
+ name: term.name,
5050
+ description: term.description
5051
+ })),
5052
+ entities: extraction.vision.entities.map((term) => ({
5053
+ id: `entity:${slugify(term.name)}`,
5054
+ name: term.name,
5055
+ description: term.description
5056
+ })),
5057
+ claims: extraction.vision.claims.map((claim, index) => ({
5058
+ id: `claim:${manifest.sourceId}:${index + 1}`,
5059
+ text: claim.text,
5060
+ confidence: claim.confidence,
5061
+ status: "extracted",
5062
+ polarity: claim.polarity,
5063
+ citation: manifest.sourceId
5064
+ })),
5065
+ questions: extraction.vision.questions,
5066
+ rationales: [],
5067
+ producedAt: (/* @__PURE__ */ new Date()).toISOString()
5068
+ };
5069
+ }
5070
+ function extractionWarningSummary(manifest, extraction) {
5071
+ const warning = extraction?.warnings?.find(Boolean);
5072
+ if (warning) {
5073
+ return `Imported ${manifest.sourceKind} source. ${warning}`;
5074
+ }
5075
+ return `Imported ${manifest.sourceKind} source. Text extraction is not yet available for this source.`;
5076
+ }
4577
5077
  async function analyzeSource(manifest, extractedText, provider, paths, schema) {
4578
- const cachePath = path10.join(paths.analysesDir, `${manifest.sourceId}.json`);
5078
+ const cachePath = path11.join(paths.analysesDir, `${manifest.sourceId}.json`);
4579
5079
  const cached = await readJsonFile(cachePath);
4580
- if (cached && cached.analysisVersion === ANALYSIS_FORMAT_VERSION && cached.sourceHash === manifest.contentHash && cached.schemaHash === schema.hash) {
5080
+ if (cached && cached.analysisVersion === ANALYSIS_FORMAT_VERSION && cached.sourceHash === manifest.contentHash && cached.extractionHash === manifest.extractionHash && cached.schemaHash === schema.hash) {
4581
5081
  return cached;
4582
5082
  }
5083
+ const extraction = await readExtractionArtifact(paths.rootDir, manifest);
4583
5084
  const content = normalizeWhitespace(extractedText ?? "");
4584
5085
  let analysis;
4585
5086
  if (manifest.sourceKind === "code" && content) {
4586
5087
  analysis = await analyzeCodeSource(manifest, extractedText ?? "", schema.hash);
5088
+ } else if (manifest.sourceKind === "image") {
5089
+ const visionAnalysis = extraction ? analysisFromVisionExtraction(manifest, extraction, schema.hash) : null;
5090
+ if (visionAnalysis) {
5091
+ analysis = visionAnalysis;
5092
+ } else if (!content) {
5093
+ analysis = {
5094
+ analysisVersion: ANALYSIS_FORMAT_VERSION,
5095
+ sourceId: manifest.sourceId,
5096
+ sourceHash: manifest.contentHash,
5097
+ extractionHash: manifest.extractionHash,
5098
+ schemaHash: schema.hash,
5099
+ title: manifest.title,
5100
+ summary: extractionWarningSummary(manifest, extraction),
5101
+ concepts: [],
5102
+ entities: [],
5103
+ claims: [],
5104
+ questions: [],
5105
+ rationales: [],
5106
+ producedAt: (/* @__PURE__ */ new Date()).toISOString()
5107
+ };
5108
+ } else if (provider.type === "heuristic") {
5109
+ analysis = heuristicAnalysis(manifest, content, schema.hash);
5110
+ } else {
5111
+ try {
5112
+ analysis = await providerAnalysis(manifest, content, provider, schema);
5113
+ } catch {
5114
+ analysis = heuristicAnalysis(manifest, content, schema.hash);
5115
+ }
5116
+ }
4587
5117
  } else if (!content) {
4588
5118
  analysis = {
4589
5119
  analysisVersion: ANALYSIS_FORMAT_VERSION,
4590
5120
  sourceId: manifest.sourceId,
4591
5121
  sourceHash: manifest.contentHash,
5122
+ extractionHash: manifest.extractionHash,
4592
5123
  schemaHash: schema.hash,
4593
5124
  title: manifest.title,
4594
- summary: `Imported ${manifest.sourceKind} source. Text extraction is not yet available for this source.`,
5125
+ summary: extractionWarningSummary(manifest, extraction),
4595
5126
  concepts: [],
4596
5127
  entities: [],
4597
5128
  claims: [],
@@ -4624,6 +5155,7 @@ var DEFAULT_BENCHMARK_QUESTIONS = [
4624
5155
  "Where are the biggest knowledge gaps?",
4625
5156
  "What evidence should I read first?"
4626
5157
  ];
5158
+ var RESEARCH_BENCHMARK_QUESTION = "Which research sources should I read first, and why?";
4627
5159
  function nodeMap(graph) {
4628
5160
  return new Map(graph.nodes.map((node) => [node.id, node]));
4629
5161
  }
@@ -4673,9 +5205,68 @@ function benchmarkQueryTokens(graph, queryResult, pageContentsById) {
4673
5205
  queryTokens,
4674
5206
  reduction: 0,
4675
5207
  visitedNodeIds: queryResult.visitedNodeIds,
5208
+ visitedEdgeIds: queryResult.visitedEdgeIds,
4676
5209
  pageIds: queryResult.pageIds
4677
5210
  };
4678
5211
  }
5212
+ function graphHash(graph) {
5213
+ const hashedPages = graph.pages.filter((page) => page.kind !== "graph_report" && page.kind !== "community_summary");
5214
+ const normalized = JSON.stringify(
5215
+ {
5216
+ nodes: [...graph.nodes].map((node) => ({
5217
+ id: node.id,
5218
+ type: node.type,
5219
+ label: node.label,
5220
+ pageId: node.pageId ?? null,
5221
+ communityId: node.communityId ?? null,
5222
+ degree: node.degree ?? null,
5223
+ bridgeScore: node.bridgeScore ?? null,
5224
+ isGodNode: node.isGodNode ?? false,
5225
+ sourceIds: [...node.sourceIds].sort(),
5226
+ projectIds: [...node.projectIds].sort()
5227
+ })).sort((left, right) => left.id.localeCompare(right.id)),
5228
+ edges: [...graph.edges].map((edge) => ({
5229
+ id: edge.id,
5230
+ source: edge.source,
5231
+ target: edge.target,
5232
+ relation: edge.relation,
5233
+ status: edge.status,
5234
+ evidenceClass: edge.evidenceClass,
5235
+ confidence: edge.confidence,
5236
+ provenance: [...edge.provenance].sort()
5237
+ })).sort((left, right) => left.id.localeCompare(right.id)),
5238
+ pages: [...hashedPages].map((page) => ({
5239
+ id: page.id,
5240
+ path: page.path,
5241
+ kind: page.kind,
5242
+ status: page.status,
5243
+ sourceType: page.sourceType ?? null,
5244
+ sourceIds: [...page.sourceIds].sort(),
5245
+ projectIds: [...page.projectIds].sort(),
5246
+ nodeIds: [...page.nodeIds].sort()
5247
+ })).sort((left, right) => left.id.localeCompare(right.id)),
5248
+ communities: [...graph.communities ?? []].map((community) => ({
5249
+ id: community.id,
5250
+ label: community.label,
5251
+ nodeIds: [...community.nodeIds].sort()
5252
+ })).sort((left, right) => left.id.localeCompare(right.id))
5253
+ },
5254
+ null,
5255
+ 0
5256
+ );
5257
+ return sha256(normalized);
5258
+ }
5259
+ function hasResearchSources(pages) {
5260
+ return pages.some((page) => page.kind === "source" && Boolean(page.sourceType) && page.sourceType !== "url");
5261
+ }
5262
+ function defaultBenchmarkQuestionsForGraph(graph, maxQuestions = 3) {
5263
+ const normalizedLimit = Math.max(1, Math.min(maxQuestions, DEFAULT_BENCHMARK_QUESTIONS.length));
5264
+ const questions = [...DEFAULT_BENCHMARK_QUESTIONS];
5265
+ if (hasResearchSources(graph.pages)) {
5266
+ questions.unshift(RESEARCH_BENCHMARK_QUESTION);
5267
+ }
5268
+ return uniqueBy(questions, (item) => item).slice(0, normalizedLimit);
5269
+ }
4679
5270
  function buildBenchmarkArtifact(input) {
4680
5271
  const corpusTokens = Math.max(1, Math.round(input.corpusWords * (100 / 75)));
4681
5272
  const perQuestion = input.perQuestion.filter((entry) => entry.queryTokens > 0).map((entry) => ({
@@ -4684,8 +5275,18 @@ function buildBenchmarkArtifact(input) {
4684
5275
  }));
4685
5276
  const avgQueryTokens = perQuestion.length ? Math.max(1, Math.round(perQuestion.reduce((total, entry) => total + entry.queryTokens, 0) / perQuestion.length)) : 0;
4686
5277
  const reductionRatio = avgQueryTokens ? Number(Math.max(0, 1 - avgQueryTokens / Math.max(1, corpusTokens)).toFixed(3)) : 0;
5278
+ const uniqueVisitedNodes = new Set(perQuestion.flatMap((entry) => entry.visitedNodeIds)).size;
5279
+ const summary = {
5280
+ questionCount: input.questions.length,
5281
+ uniqueVisitedNodes,
5282
+ finalContextTokens: avgQueryTokens,
5283
+ naiveCorpusTokens: corpusTokens,
5284
+ avgReduction: reductionRatio,
5285
+ reductionRatio
5286
+ };
4687
5287
  return {
4688
5288
  generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
5289
+ graphHash: graphHash(input.graph),
4689
5290
  corpusWords: input.corpusWords,
4690
5291
  corpusTokens,
4691
5292
  nodes: input.graph.nodes.length,
@@ -4693,7 +5294,9 @@ function buildBenchmarkArtifact(input) {
4693
5294
  avgQueryTokens,
4694
5295
  reductionRatio,
4695
5296
  sampleQuestions: input.questions,
4696
- perQuestion
5297
+ perQuestion,
5298
+ questionResults: perQuestion,
5299
+ summary
4697
5300
  };
4698
5301
  }
4699
5302
 
@@ -4714,10 +5317,10 @@ function conflictConfidence(claimA, claimB) {
4714
5317
  }
4715
5318
 
4716
5319
  // src/deep-lint.ts
4717
- import fs10 from "fs/promises";
4718
- import path13 from "path";
4719
- import matter3 from "gray-matter";
4720
- import { z as z4 } from "zod";
5320
+ import fs11 from "fs/promises";
5321
+ import path14 from "path";
5322
+ import matter4 from "gray-matter";
5323
+ import { z as z5 } from "zod";
4721
5324
 
4722
5325
  // src/findings.ts
4723
5326
  function normalizeFindingSeverity(value) {
@@ -4736,25 +5339,25 @@ function normalizeFindingSeverity(value) {
4736
5339
 
4737
5340
  // src/orchestration.ts
4738
5341
  import { spawn } from "child_process";
4739
- import path11 from "path";
4740
- import { z as z2 } from "zod";
4741
- var orchestrationRoleResultSchema = z2.object({
4742
- summary: z2.string().optional(),
4743
- findings: z2.array(
4744
- z2.object({
4745
- severity: z2.string().optional().default("info"),
4746
- message: z2.string().min(1),
4747
- relatedPageIds: z2.array(z2.string()).optional(),
4748
- relatedSourceIds: z2.array(z2.string()).optional(),
4749
- suggestedQuery: z2.string().optional()
5342
+ import path12 from "path";
5343
+ import { z as z3 } from "zod";
5344
+ var orchestrationRoleResultSchema = z3.object({
5345
+ summary: z3.string().optional(),
5346
+ findings: z3.array(
5347
+ z3.object({
5348
+ severity: z3.string().optional().default("info"),
5349
+ message: z3.string().min(1),
5350
+ relatedPageIds: z3.array(z3.string()).optional(),
5351
+ relatedSourceIds: z3.array(z3.string()).optional(),
5352
+ suggestedQuery: z3.string().optional()
4750
5353
  })
4751
5354
  ).default([]),
4752
- questions: z2.array(z2.string().min(1)).default([]),
4753
- proposals: z2.array(
4754
- z2.object({
4755
- path: z2.string().min(1),
4756
- content: z2.string().min(1),
4757
- reason: z2.string().min(1)
5355
+ questions: z3.array(z3.string().min(1)).default([]),
5356
+ proposals: z3.array(
5357
+ z3.object({
5358
+ path: z3.string().min(1),
5359
+ content: z3.string().min(1),
5360
+ reason: z3.string().min(1)
4758
5361
  })
4759
5362
  ).default([])
4760
5363
  });
@@ -4829,7 +5432,7 @@ async function runProviderRole(rootDir, role, roleConfig, input) {
4829
5432
  }
4830
5433
  async function runCommandRole(rootDir, role, executor, input) {
4831
5434
  const [command, ...args] = executor.command;
4832
- const cwd = executor.cwd ? path11.resolve(rootDir, executor.cwd) : rootDir;
5435
+ const cwd = executor.cwd ? path12.resolve(rootDir, executor.cwd) : rootDir;
4833
5436
  const child = spawn(command, args, {
4834
5437
  cwd,
4835
5438
  env: {
@@ -4923,9 +5526,9 @@ function summarizeRoleQuestions(results) {
4923
5526
  }
4924
5527
 
4925
5528
  // src/web-search/registry.ts
4926
- import path12 from "path";
5529
+ import path13 from "path";
4927
5530
  import { pathToFileURL } from "url";
4928
- import { z as z3 } from "zod";
5531
+ import { z as z4 } from "zod";
4929
5532
 
4930
5533
  // src/web-search/http-json.ts
4931
5534
  function deepGet(value, pathValue) {
@@ -5007,10 +5610,10 @@ var HttpJsonWebSearchAdapter = class {
5007
5610
  };
5008
5611
 
5009
5612
  // src/web-search/registry.ts
5010
- var customWebSearchModuleSchema = z3.object({
5011
- createAdapter: z3.function({
5012
- input: [z3.string(), z3.custom(), z3.string()],
5013
- output: z3.promise(z3.custom())
5613
+ var customWebSearchModuleSchema = z4.object({
5614
+ createAdapter: z4.function({
5615
+ input: [z4.string(), z4.custom(), z4.string()],
5616
+ output: z4.promise(z4.custom())
5014
5617
  })
5015
5618
  });
5016
5619
  async function createWebSearchAdapter(id, config, rootDir) {
@@ -5021,7 +5624,7 @@ async function createWebSearchAdapter(id, config, rootDir) {
5021
5624
  if (!config.module) {
5022
5625
  throw new Error(`Web search provider ${id} is type "custom" but no module path was configured.`);
5023
5626
  }
5024
- const resolvedModule = path12.isAbsolute(config.module) ? config.module : path12.resolve(rootDir, config.module);
5627
+ const resolvedModule = path13.isAbsolute(config.module) ? config.module : path13.resolve(rootDir, config.module);
5025
5628
  const loaded = await import(pathToFileURL(resolvedModule).href);
5026
5629
  const parsed = customWebSearchModuleSchema.parse(loaded);
5027
5630
  return parsed.createAdapter(id, config, rootDir);
@@ -5045,15 +5648,15 @@ async function getWebSearchAdapterForTask(rootDir, task) {
5045
5648
  }
5046
5649
 
5047
5650
  // src/deep-lint.ts
5048
- var deepLintResponseSchema = z4.object({
5049
- findings: z4.array(
5050
- z4.object({
5051
- severity: z4.string().optional().default("info"),
5052
- code: z4.enum(["coverage_gap", "contradiction_candidate", "missing_citation", "candidate_page", "follow_up_question"]),
5053
- message: z4.string().min(1),
5054
- relatedSourceIds: z4.array(z4.string()).default([]),
5055
- relatedPageIds: z4.array(z4.string()).default([]),
5056
- suggestedQuery: z4.string().optional()
5651
+ var deepLintResponseSchema = z5.object({
5652
+ findings: z5.array(
5653
+ z5.object({
5654
+ severity: z5.string().optional().default("info"),
5655
+ code: z5.enum(["coverage_gap", "contradiction_candidate", "missing_citation", "candidate_page", "follow_up_question"]),
5656
+ message: z5.string().min(1),
5657
+ relatedSourceIds: z5.array(z5.string()).default([]),
5658
+ relatedPageIds: z5.array(z5.string()).default([]),
5659
+ suggestedQuery: z5.string().optional()
5057
5660
  })
5058
5661
  ).max(20)
5059
5662
  });
@@ -5081,9 +5684,9 @@ async function loadContextPages(rootDir, graph) {
5081
5684
  );
5082
5685
  return Promise.all(
5083
5686
  contextPages.slice(0, 18).map(async (page) => {
5084
- const absolutePath = path13.join(paths.wikiDir, page.path);
5085
- const raw = await fs10.readFile(absolutePath, "utf8").catch(() => "");
5086
- const parsed = matter3(raw);
5687
+ const absolutePath = path14.join(paths.wikiDir, page.path);
5688
+ const raw = await fs11.readFile(absolutePath, "utf8").catch(() => "");
5689
+ const parsed = matter4(raw);
5087
5690
  return {
5088
5691
  id: page.id,
5089
5692
  title: page.title,
@@ -5130,7 +5733,7 @@ function heuristicDeepFindings(contextPages, structuralFindings, graph) {
5130
5733
  code: "missing_citation",
5131
5734
  message: finding.message,
5132
5735
  pagePath: finding.pagePath,
5133
- suggestedQuery: finding.pagePath ? `Which sources support the claims in ${path13.basename(finding.pagePath, ".md")}?` : void 0
5736
+ suggestedQuery: finding.pagePath ? `Which sources support the claims in ${path14.basename(finding.pagePath, ".md")}?` : void 0
5134
5737
  });
5135
5738
  }
5136
5739
  for (const page of contextPages.filter((item) => item.kind === "source").slice(0, 3)) {
@@ -5611,12 +6214,15 @@ function topGodNodes(graph, limit = 10) {
5611
6214
  }
5612
6215
 
5613
6216
  // src/markdown.ts
5614
- import matter4 from "gray-matter";
5615
- function uniqueStrings(values) {
6217
+ import matter5 from "gray-matter";
6218
+ function uniqueStrings2(values) {
5616
6219
  return uniqueBy(values.filter(Boolean), (value) => value);
5617
6220
  }
6221
+ function safeFrontmatter(value) {
6222
+ return JSON.parse(JSON.stringify(value));
6223
+ }
5618
6224
  function decoratedTags(baseTags, decorations) {
5619
- return uniqueStrings([
6225
+ return uniqueStrings2([
5620
6226
  ...baseTags,
5621
6227
  ...(decorations?.projectIds ?? []).map((projectId) => `project/${projectId}`),
5622
6228
  ...decorations?.extraTags ?? []
@@ -5695,6 +6301,7 @@ function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutput
5695
6301
  page_id: pageId,
5696
6302
  kind: "source",
5697
6303
  title: analysis.title,
6304
+ ...manifest.sourceType ? { source_type: manifest.sourceType } : {},
5698
6305
  tags: decoratedTags(analysis.code ? ["source", "code"] : ["source"], decorations),
5699
6306
  source_ids: [manifest.sourceId],
5700
6307
  project_ids: decorations?.projectIds ?? [],
@@ -5717,6 +6324,7 @@ function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutput
5717
6324
  "",
5718
6325
  `Source ID: \`${manifest.sourceId}\``,
5719
6326
  manifest.url ? `Source URL: ${manifest.url}` : `Source Path: \`${manifest.originalPath ?? manifest.storedPath}\``,
6327
+ ...manifest.sourceType ? [`Source Type: \`${manifest.sourceType}\``, ""] : [""],
5720
6328
  "",
5721
6329
  "## Summary",
5722
6330
  "",
@@ -5761,6 +6369,7 @@ function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutput
5761
6369
  path: relativePath,
5762
6370
  title: analysis.title,
5763
6371
  kind: "source",
6372
+ sourceType: manifest.sourceType,
5764
6373
  sourceIds: [manifest.sourceId],
5765
6374
  projectIds: decorations?.projectIds ?? [],
5766
6375
  nodeIds,
@@ -5778,7 +6387,7 @@ function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutput
5778
6387
  compiledFrom: metadata.compiledFrom,
5779
6388
  managedBy: metadata.managedBy
5780
6389
  },
5781
- content: matter4.stringify(body, frontmatter)
6390
+ content: matter5.stringify(body, safeFrontmatter(frontmatter))
5782
6391
  };
5783
6392
  }
5784
6393
  function buildModulePage(input) {
@@ -5793,7 +6402,7 @@ function buildModulePage(input) {
5793
6402
  const nodeIds = [code.moduleId, ...code.symbols.map((symbol) => symbol.id)];
5794
6403
  const localModuleBacklinks = input.localModules.map((moduleRef) => moduleRef.page.id);
5795
6404
  const relatedOutputs = input.relatedOutputs ?? [];
5796
- const backlinks = uniqueStrings([sourcePage.id, ...localModuleBacklinks, ...relatedOutputs.map((page) => page.id)]);
6405
+ const backlinks = uniqueStrings2([sourcePage.id, ...localModuleBacklinks, ...relatedOutputs.map((page) => page.id)]);
5797
6406
  const importsSection = code.imports.length ? code.imports.map((item) => {
5798
6407
  const localModule = item.resolvedSourceId ? input.localModules.find((moduleRef) => moduleRef.sourceId === item.resolvedSourceId && moduleRef.reExport === item.reExport) : void 0;
5799
6408
  const importedBits = [
@@ -5839,9 +6448,9 @@ function buildModulePage(input) {
5839
6448
  source_hashes: {
5840
6449
  [manifest.sourceId]: manifest.contentHash
5841
6450
  },
5842
- related_page_ids: uniqueStrings([sourcePage.id, ...localModuleBacklinks, ...relatedOutputs.map((page) => page.id)]),
6451
+ related_page_ids: uniqueStrings2([sourcePage.id, ...localModuleBacklinks, ...relatedOutputs.map((page) => page.id)]),
5843
6452
  related_node_ids: [],
5844
- related_source_ids: uniqueStrings([
6453
+ related_source_ids: uniqueStrings2([
5845
6454
  manifest.sourceId,
5846
6455
  ...input.localModules.map((moduleRef) => moduleRef.sourceId),
5847
6456
  ...relatedOutputs.flatMap((page) => page.sourceIds)
@@ -5913,9 +6522,9 @@ function buildModulePage(input) {
5913
6522
  backlinks,
5914
6523
  schemaHash,
5915
6524
  sourceHashes: { [manifest.sourceId]: manifest.contentHash },
5916
- relatedPageIds: uniqueStrings([sourcePage.id, ...localModuleBacklinks, ...relatedOutputs.map((page) => page.id)]),
6525
+ relatedPageIds: uniqueStrings2([sourcePage.id, ...localModuleBacklinks, ...relatedOutputs.map((page) => page.id)]),
5917
6526
  relatedNodeIds: [],
5918
- relatedSourceIds: uniqueStrings([
6527
+ relatedSourceIds: uniqueStrings2([
5919
6528
  manifest.sourceId,
5920
6529
  ...input.localModules.map((moduleRef) => moduleRef.sourceId),
5921
6530
  ...relatedOutputs.flatMap((page) => page.sourceIds)
@@ -5925,7 +6534,7 @@ function buildModulePage(input) {
5925
6534
  compiledFrom: metadata.compiledFrom,
5926
6535
  managedBy: metadata.managedBy
5927
6536
  },
5928
- content: matter4.stringify(body, frontmatter)
6537
+ content: matter5.stringify(body, frontmatter)
5929
6538
  };
5930
6539
  }
5931
6540
  function buildAggregatePage(kind, name, descriptions, sourceAnalyses, sourceHashes, schemaHash, metadata, relativePath, relatedOutputs = [], decorations) {
@@ -5996,7 +6605,7 @@ function buildAggregatePage(kind, name, descriptions, sourceAnalyses, sourceHash
5996
6605
  compiledFrom: metadata.compiledFrom,
5997
6606
  managedBy: metadata.managedBy
5998
6607
  },
5999
- content: matter4.stringify(body, frontmatter)
6608
+ content: matter5.stringify(body, frontmatter)
6000
6609
  };
6001
6610
  }
6002
6611
  function buildIndexPage(pages, schemaHash, metadata, projectPages = []) {
@@ -6072,7 +6681,7 @@ function buildIndexPage(pages, schemaHash, metadata, projectPages = []) {
6072
6681
  }
6073
6682
  function buildSectionIndex(kind, pages, schemaHash, metadata, projectIds = []) {
6074
6683
  const title = kind.charAt(0).toUpperCase() + kind.slice(1);
6075
- return matter4.stringify(
6684
+ return matter5.stringify(
6076
6685
  [`# ${title}`, "", ...pages.map((page) => `- [[${page.path.replace(/\.md$/, "")}|${page.title}]]`), ""].join("\n"),
6077
6686
  {
6078
6687
  page_id: `${kind}:index`,
@@ -6114,27 +6723,118 @@ function crossCommunityEdges(graph) {
6114
6723
  function suggestedGraphQuestions(graph) {
6115
6724
  const thinCommunities = (graph.communities ?? []).filter((community) => community.nodeIds.length <= 2);
6116
6725
  const bridgeNodes = graph.nodes.filter((node) => (node.bridgeScore ?? 0) > 0).sort((left, right) => (right.bridgeScore ?? 0) - (left.bridgeScore ?? 0)).slice(0, 3);
6117
- return uniqueStrings([
6726
+ return uniqueStrings2([
6118
6727
  ...thinCommunities.map((community) => `What sources would strengthen community ${community.label}?`),
6119
6728
  ...bridgeNodes.map((node) => `Why does ${node.label} connect multiple communities in the vault?`)
6120
6729
  ]).slice(0, 6);
6121
6730
  }
6731
+ function buildGraphReportArtifact(input) {
6732
+ const nodesById = new Map(input.graph.nodes.map((node) => [node.id, node]));
6733
+ const godNodes = input.graph.nodes.filter((node) => node.isGodNode).sort((left, right) => (right.degree ?? 0) - (left.degree ?? 0)).slice(0, 8);
6734
+ const bridgeNodes = input.graph.nodes.filter((node) => (node.bridgeScore ?? 0) > 0).sort((left, right) => (right.bridgeScore ?? 0) - (left.bridgeScore ?? 0)).slice(0, 8);
6735
+ const thinCommunities = (input.graph.communities ?? []).filter((community) => community.nodeIds.length <= 2).map((community) => {
6736
+ const page = input.communityPages.find((candidate) => candidate.id === `graph:${community.id}`);
6737
+ return {
6738
+ id: community.id,
6739
+ label: community.label,
6740
+ nodeCount: community.nodeIds.length,
6741
+ pageId: page?.id,
6742
+ path: page?.path,
6743
+ title: page?.title
6744
+ };
6745
+ });
6746
+ const surprisingConnections = crossCommunityEdges(input.graph).slice(0, 8).map((edge) => {
6747
+ const source = nodesById.get(edge.source);
6748
+ const target = nodesById.get(edge.target);
6749
+ const path23 = shortestGraphPath(input.graph, edge.source, edge.target);
6750
+ const sourceCommunity = source?.communityId ? input.graph.communities?.find((community) => community.id === source.communityId) : void 0;
6751
+ const targetCommunity = target?.communityId ? input.graph.communities?.find((community) => community.id === target.communityId) : void 0;
6752
+ return {
6753
+ id: edge.id,
6754
+ sourceNodeId: edge.source,
6755
+ sourceLabel: source?.label ?? edge.source,
6756
+ targetNodeId: edge.target,
6757
+ targetLabel: target?.label ?? edge.target,
6758
+ relation: edge.relation,
6759
+ evidenceClass: edge.evidenceClass,
6760
+ confidence: edge.confidence,
6761
+ pathNodeIds: path23.nodeIds,
6762
+ pathEdgeIds: path23.edgeIds,
6763
+ pathSummary: path23.summary,
6764
+ explanation: normalizeWhitespace(
6765
+ [
6766
+ `${source?.label ?? edge.source} links ${sourceCommunity?.label ? `from ${sourceCommunity.label}` : ""}`.trim(),
6767
+ `to ${target?.label ?? edge.target}${targetCommunity?.label ? ` in ${targetCommunity.label}` : ""}`.trim(),
6768
+ `through ${edge.relation} with ${edge.evidenceClass} evidence at ${edge.confidence.toFixed(2)} confidence.`
6769
+ ].join(" ")
6770
+ )
6771
+ };
6772
+ });
6773
+ return {
6774
+ generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
6775
+ graphHash: input.graphHash,
6776
+ overview: {
6777
+ nodes: input.graph.nodes.length,
6778
+ edges: input.graph.edges.length,
6779
+ pages: input.graph.pages.length,
6780
+ communities: input.graph.communities?.length ?? 0
6781
+ },
6782
+ benchmark: input.benchmark ? {
6783
+ generatedAt: input.benchmark.generatedAt,
6784
+ stale: input.benchmarkStale ?? false,
6785
+ summary: input.benchmark.summary,
6786
+ questionCount: input.benchmark.sampleQuestions.length
6787
+ } : void 0,
6788
+ godNodes: godNodes.map((node) => ({
6789
+ nodeId: node.id,
6790
+ label: node.label,
6791
+ pageId: node.pageId,
6792
+ degree: node.degree,
6793
+ bridgeScore: node.bridgeScore
6794
+ })),
6795
+ bridgeNodes: bridgeNodes.map((node) => ({
6796
+ nodeId: node.id,
6797
+ label: node.label,
6798
+ pageId: node.pageId,
6799
+ degree: node.degree,
6800
+ bridgeScore: node.bridgeScore
6801
+ })),
6802
+ thinCommunities,
6803
+ surprisingConnections,
6804
+ suggestedQuestions: suggestedGraphQuestions(input.graph),
6805
+ communityPages: input.communityPages.map((page) => ({
6806
+ id: page.id,
6807
+ path: page.path,
6808
+ title: page.title
6809
+ })),
6810
+ recentResearchSources: (input.recentResearchSources ?? []).map((page) => ({
6811
+ pageId: page.id,
6812
+ path: page.path,
6813
+ title: page.title,
6814
+ sourceType: page.sourceType,
6815
+ updatedAt: page.updatedAt
6816
+ }))
6817
+ };
6818
+ }
6122
6819
  function buildGraphReportPage(input) {
6123
6820
  const pageId = "graph:report";
6124
6821
  const pathValue = pagePathFor("graph_report", "report");
6125
6822
  const pagesById = new Map(input.graph.pages.map((page) => [page.id, page]));
6126
6823
  const nodesById = new Map(input.graph.nodes.map((node) => [node.id, node]));
6127
- const godNodes = input.graph.nodes.filter((node) => node.isGodNode).sort((left, right) => (right.degree ?? 0) - (left.degree ?? 0)).slice(0, 8);
6128
- const bridgeNodes = input.graph.nodes.filter((node) => (node.bridgeScore ?? 0) > 0).sort((left, right) => (right.bridgeScore ?? 0) - (left.bridgeScore ?? 0)).slice(0, 8);
6129
- const surprisingEdges = crossCommunityEdges(input.graph).slice(0, 8);
6130
- const thinCommunities = (input.graph.communities ?? []).filter((community) => community.nodeIds.length <= 2);
6131
- const relatedNodeIds = uniqueStrings([...godNodes, ...bridgeNodes].map((node) => node.id));
6132
- const relatedPageIds = uniqueStrings([
6133
- ...godNodes.map((node) => node.pageId ?? ""),
6134
- ...bridgeNodes.map((node) => node.pageId ?? ""),
6135
- ...input.communityPages.map((page) => page.id)
6824
+ const relatedNodeIds = uniqueStrings2([
6825
+ ...input.report.godNodes.map((node) => node.nodeId),
6826
+ ...input.report.bridgeNodes.map((node) => node.nodeId)
6827
+ ]);
6828
+ const relatedPageIds = uniqueStrings2([
6829
+ ...input.report.godNodes.map((node) => node.pageId ?? ""),
6830
+ ...input.report.bridgeNodes.map((node) => node.pageId ?? ""),
6831
+ ...input.report.communityPages.map((page) => page.id),
6832
+ ...input.report.recentResearchSources.map((page) => page.pageId)
6833
+ ]);
6834
+ const relatedSourceIds = uniqueStrings2([
6835
+ ...relatedNodeIds.flatMap((nodeId) => nodesById.get(nodeId)?.sourceIds ?? []),
6836
+ ...input.report.recentResearchSources.flatMap((page) => pagesById.get(page.pageId)?.sourceIds ?? [])
6136
6837
  ]);
6137
- const relatedSourceIds = uniqueStrings(relatedNodeIds.flatMap((nodeId) => nodesById.get(nodeId)?.sourceIds ?? []));
6138
6838
  const frontmatter = {
6139
6839
  page_id: pageId,
6140
6840
  kind: "graph_report",
@@ -6162,47 +6862,66 @@ function buildGraphReportPage(input) {
6162
6862
  "",
6163
6863
  "## Overview",
6164
6864
  "",
6165
- `- Nodes: ${input.graph.nodes.length}`,
6166
- `- Edges: ${input.graph.edges.length}`,
6167
- `- Pages: ${input.graph.pages.length}`,
6168
- `- Communities: ${input.graph.communities?.length ?? 0}`,
6865
+ `- Nodes: ${input.report.overview.nodes}`,
6866
+ `- Edges: ${input.report.overview.edges}`,
6867
+ `- Pages: ${input.report.overview.pages}`,
6868
+ `- Communities: ${input.report.overview.communities}`,
6169
6869
  "",
6170
- ...input.benchmark ? [
6171
- "## Benchmark",
6172
- "",
6173
- `- Corpus Tokens: ${input.benchmark.corpusTokens}`,
6174
- `- Avg Query Tokens: ${input.benchmark.avgQueryTokens}`,
6175
- `- Reduction Ratio: ${(input.benchmark.reductionRatio * 100).toFixed(1)}%`,
6176
- `- Sample Questions: ${input.benchmark.sampleQuestions.length}`,
6870
+ "## Benchmark Summary",
6871
+ "",
6872
+ ...input.report.benchmark ? [
6873
+ `- Generated At: ${input.report.benchmark.generatedAt}`,
6874
+ `- Status: ${input.report.benchmark.stale ? "Stale (graph changed since benchmark ran)" : "Fresh"}`,
6875
+ `- Naive Corpus Tokens: ${input.report.benchmark.summary.naiveCorpusTokens}`,
6876
+ `- Final Context Tokens: ${input.report.benchmark.summary.finalContextTokens}`,
6877
+ `- Unique Nodes Considered: ${input.report.benchmark.summary.uniqueVisitedNodes}`,
6878
+ `- Reduction Ratio: ${(input.report.benchmark.summary.reductionRatio * 100).toFixed(1)}%`,
6879
+ `- Questions: ${input.report.benchmark.questionCount}`,
6177
6880
  ""
6178
- ] : [],
6179
- "## God Nodes",
6881
+ ] : ["- No benchmark results yet.", ""],
6882
+ "## Top God Nodes",
6180
6883
  "",
6181
- ...godNodes.length ? godNodes.map((node) => `- ${graphNodeLink(node, pagesById)} (${nodeSummary(node)})`) : ["- No high-connectivity nodes detected."],
6884
+ ...input.report.godNodes.length ? input.report.godNodes.map((node) => {
6885
+ const graphNode = nodesById.get(node.nodeId);
6886
+ return graphNode ? `- ${graphNodeLink(graphNode, pagesById)} (${nodeSummary(graphNode)})` : `- \`${node.nodeId}\``;
6887
+ }) : ["- No high-connectivity nodes detected."],
6182
6888
  "",
6183
- "## Bridge Nodes",
6889
+ "## Top Bridge Nodes",
6184
6890
  "",
6185
- ...bridgeNodes.length ? bridgeNodes.map((node) => `- ${graphNodeLink(node, pagesById)} (${nodeSummary(node)})`) : ["- No cross-community bridge nodes detected."],
6891
+ ...input.report.bridgeNodes.length ? input.report.bridgeNodes.map((node) => {
6892
+ const graphNode = nodesById.get(node.nodeId);
6893
+ return graphNode ? `- ${graphNodeLink(graphNode, pagesById)} (${nodeSummary(graphNode)})` : `- \`${node.nodeId}\``;
6894
+ }) : ["- No cross-community bridge nodes detected."],
6186
6895
  "",
6187
6896
  "## Communities",
6188
6897
  "",
6189
- ...input.communityPages.length ? input.communityPages.map((page) => `- ${pageLink(page)}`) : ["- No community summaries generated yet."],
6898
+ ...input.report.communityPages.length ? input.report.communityPages.map((page) => `- ${pageLink(page)}`) : ["- No community summaries generated yet."],
6190
6899
  "",
6191
- "## Thin Communities",
6900
+ "## Thin Or Underlinked Areas",
6192
6901
  "",
6193
- ...thinCommunities.length ? thinCommunities.map((community) => `- ${community.label} (${community.nodeIds.length} node(s))`) : ["- No thin communities detected."],
6902
+ ...input.report.thinCommunities.length ? input.report.thinCommunities.map(
6903
+ (community) => community.path ? `- [[${community.path.replace(/\.md$/, "")}|${community.title ?? community.label}]] (${community.nodeCount} node(s))` : `- ${community.label} (${community.nodeCount} node(s))`
6904
+ ) : ["- No thin communities detected."],
6194
6905
  "",
6195
- "## Cross-Community Surprises",
6906
+ "## Surprising Connections",
6196
6907
  "",
6197
- ...surprisingEdges.length ? surprisingEdges.map((edge) => {
6198
- const source = nodesById.get(edge.source);
6199
- const target = nodesById.get(edge.target);
6200
- return `- ${source ? graphNodeLink(source, pagesById) : `\`${edge.source}\``} ${edge.relation} ${target ? graphNodeLink(target, pagesById) : `\`${edge.target}\``} (${edge.evidenceClass}, ${edge.confidence.toFixed(2)})`;
6908
+ ...input.report.surprisingConnections.length ? input.report.surprisingConnections.map((connection) => {
6909
+ const source = nodesById.get(connection.sourceNodeId);
6910
+ const target = nodesById.get(connection.targetNodeId);
6911
+ const sourceLabel = source ? graphNodeLink(source, pagesById) : `\`${connection.sourceNodeId}\``;
6912
+ const targetLabel = target ? graphNodeLink(target, pagesById) : `\`${connection.targetNodeId}\``;
6913
+ return `- ${sourceLabel} ${connection.relation} ${targetLabel} (${connection.evidenceClass}, ${connection.confidence.toFixed(2)}). ${connection.explanation} Path: ${connection.pathSummary}.`;
6201
6914
  }) : ["- No cross-community links detected."],
6202
6915
  "",
6203
- "## Suggested Follow-Up Questions",
6916
+ "## New Research Sources",
6917
+ "",
6918
+ ...input.report.recentResearchSources.length ? input.report.recentResearchSources.map(
6919
+ (page) => `- [[${page.path.replace(/\.md$/, "")}|${page.title}]] (\`${page.sourceType}\`, updated ${page.updatedAt})`
6920
+ ) : ["- No newly captured research sources since the previous compile."],
6921
+ "",
6922
+ "## Suggested Questions",
6204
6923
  "",
6205
- ...suggestedGraphQuestions(input.graph).map((question) => `- ${question}`),
6924
+ ...input.report.suggestedQuestions.map((question) => `- ${question}`),
6206
6925
  ""
6207
6926
  ].join("\n");
6208
6927
  return {
@@ -6228,7 +6947,7 @@ function buildGraphReportPage(input) {
6228
6947
  compiledFrom: input.metadata.compiledFrom,
6229
6948
  managedBy: input.metadata.managedBy
6230
6949
  },
6231
- content: matter4.stringify(body, frontmatter)
6950
+ content: matter5.stringify(body, frontmatter)
6232
6951
  };
6233
6952
  }
6234
6953
  function buildCommunitySummaryPage(input) {
@@ -6237,14 +6956,14 @@ function buildCommunitySummaryPage(input) {
6237
6956
  const nodesById = new Map(input.graph.nodes.map((node) => [node.id, node]));
6238
6957
  const pagesById = new Map(input.graph.pages.map((page) => [page.id, page]));
6239
6958
  const communityNodes = input.community.nodeIds.map((nodeId) => nodesById.get(nodeId)).filter((node) => Boolean(node));
6240
- const communityPageIds = uniqueStrings(communityNodes.map((node) => node.pageId ?? ""));
6959
+ const communityPageIds = uniqueStrings2(communityNodes.map((node) => node.pageId ?? ""));
6241
6960
  const communityPages = communityPageIds.map((id) => pagesById.get(id)).filter((page) => Boolean(page));
6242
6961
  const externalEdges = input.graph.edges.filter((edge) => {
6243
6962
  const source = nodesById.get(edge.source);
6244
6963
  const target = nodesById.get(edge.target);
6245
6964
  return source?.communityId === input.community.id && target?.communityId && target.communityId !== input.community.id;
6246
6965
  }).slice(0, 8);
6247
- const relatedSourceIds = uniqueStrings(communityNodes.flatMap((node) => node.sourceIds));
6966
+ const relatedSourceIds = uniqueStrings2(communityNodes.flatMap((node) => node.sourceIds));
6248
6967
  const frontmatter = {
6249
6968
  page_id: pageId,
6250
6969
  kind: "community_summary",
@@ -6263,7 +6982,7 @@ function buildCommunitySummaryPage(input) {
6263
6982
  backlinks: ["graph:report"],
6264
6983
  schema_hash: input.schemaHash,
6265
6984
  source_hashes: {},
6266
- related_page_ids: uniqueStrings(["graph:report", ...communityPageIds]),
6985
+ related_page_ids: uniqueStrings2(["graph:report", ...communityPageIds]),
6267
6986
  related_node_ids: input.community.nodeIds,
6268
6987
  related_source_ids: relatedSourceIds
6269
6988
  };
@@ -6302,7 +7021,7 @@ function buildCommunitySummaryPage(input) {
6302
7021
  backlinks: ["graph:report"],
6303
7022
  schemaHash: input.schemaHash,
6304
7023
  sourceHashes: {},
6305
- relatedPageIds: uniqueStrings(["graph:report", ...communityPageIds]),
7024
+ relatedPageIds: uniqueStrings2(["graph:report", ...communityPageIds]),
6306
7025
  relatedNodeIds: input.community.nodeIds,
6307
7026
  relatedSourceIds,
6308
7027
  createdAt: input.metadata.createdAt,
@@ -6310,11 +7029,11 @@ function buildCommunitySummaryPage(input) {
6310
7029
  compiledFrom: input.metadata.compiledFrom,
6311
7030
  managedBy: input.metadata.managedBy
6312
7031
  },
6313
- content: matter4.stringify(body, frontmatter)
7032
+ content: matter5.stringify(body, frontmatter)
6314
7033
  };
6315
7034
  }
6316
7035
  function buildProjectsIndex(projectPages, schemaHash, metadata) {
6317
- return matter4.stringify(
7036
+ return matter5.stringify(
6318
7037
  [
6319
7038
  "# Projects",
6320
7039
  "",
@@ -6344,7 +7063,7 @@ function buildProjectsIndex(projectPages, schemaHash, metadata) {
6344
7063
  }
6345
7064
  function buildProjectIndex(input) {
6346
7065
  const title = `Project: ${input.projectId}`;
6347
- return matter4.stringify(
7066
+ return matter5.stringify(
6348
7067
  [
6349
7068
  `# ${title}`,
6350
7069
  "",
@@ -6457,7 +7176,7 @@ function buildOutputPage(input) {
6457
7176
  outputFormat: input.outputFormat,
6458
7177
  outputAssets
6459
7178
  },
6460
- content: matter4.stringify(
7179
+ content: matter5.stringify(
6461
7180
  (input.outputFormat === "slides" ? [
6462
7181
  input.answer,
6463
7182
  "",
@@ -6583,7 +7302,7 @@ function buildExploreHubPage(input) {
6583
7302
  outputFormat: input.outputFormat,
6584
7303
  outputAssets
6585
7304
  },
6586
- content: matter4.stringify(
7305
+ content: matter5.stringify(
6587
7306
  (input.outputFormat === "slides" ? [
6588
7307
  `# ${title}`,
6589
7308
  "",
@@ -6653,49 +7372,49 @@ function buildExploreHubPage(input) {
6653
7372
  }
6654
7373
 
6655
7374
  // src/output-artifacts.ts
6656
- import { z as z5 } from "zod";
7375
+ import { z as z6 } from "zod";
6657
7376
  function escapeXml(value) {
6658
7377
  return value.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&apos;");
6659
7378
  }
6660
7379
  function clampNumber(value, min, max) {
6661
7380
  return Math.min(max, Math.max(min, value));
6662
7381
  }
6663
- var chartSpecSchema = z5.object({
6664
- kind: z5.enum(["bar", "line"]).default("bar"),
6665
- title: z5.string().min(1),
6666
- subtitle: z5.string().optional(),
6667
- xLabel: z5.string().optional(),
6668
- yLabel: z5.string().optional(),
6669
- seriesLabel: z5.string().optional(),
6670
- data: z5.array(
6671
- z5.object({
6672
- label: z5.string().min(1),
6673
- value: z5.number().finite()
7382
+ var chartSpecSchema = z6.object({
7383
+ kind: z6.enum(["bar", "line"]).default("bar"),
7384
+ title: z6.string().min(1),
7385
+ subtitle: z6.string().optional(),
7386
+ xLabel: z6.string().optional(),
7387
+ yLabel: z6.string().optional(),
7388
+ seriesLabel: z6.string().optional(),
7389
+ data: z6.array(
7390
+ z6.object({
7391
+ label: z6.string().min(1),
7392
+ value: z6.number().finite()
6674
7393
  })
6675
7394
  ).min(2).max(12),
6676
- notes: z5.array(z5.string().min(1)).max(5).optional()
7395
+ notes: z6.array(z6.string().min(1)).max(5).optional()
6677
7396
  });
6678
- var sceneSpecSchema = z5.object({
6679
- title: z5.string().min(1),
6680
- alt: z5.string().min(1),
6681
- background: z5.string().optional(),
6682
- width: z5.number().int().positive().max(2400).optional(),
6683
- height: z5.number().int().positive().max(2400).optional(),
6684
- elements: z5.array(
6685
- z5.object({
6686
- kind: z5.enum(["shape", "label"]),
6687
- shape: z5.enum(["rect", "circle", "line"]).optional(),
6688
- x: z5.number().finite(),
6689
- y: z5.number().finite(),
6690
- width: z5.number().finite().optional(),
6691
- height: z5.number().finite().optional(),
6692
- radius: z5.number().finite().optional(),
6693
- text: z5.string().optional(),
6694
- fontSize: z5.number().finite().optional(),
6695
- fill: z5.string().optional(),
6696
- stroke: z5.string().optional(),
6697
- strokeWidth: z5.number().finite().optional(),
6698
- opacity: z5.number().finite().optional()
7397
+ var sceneSpecSchema = z6.object({
7398
+ title: z6.string().min(1),
7399
+ alt: z6.string().min(1),
7400
+ background: z6.string().optional(),
7401
+ width: z6.number().int().positive().max(2400).optional(),
7402
+ height: z6.number().int().positive().max(2400).optional(),
7403
+ elements: z6.array(
7404
+ z6.object({
7405
+ kind: z6.enum(["shape", "label"]),
7406
+ shape: z6.enum(["rect", "circle", "line"]).optional(),
7407
+ x: z6.number().finite(),
7408
+ y: z6.number().finite(),
7409
+ width: z6.number().finite().optional(),
7410
+ height: z6.number().finite().optional(),
7411
+ radius: z6.number().finite().optional(),
7412
+ text: z6.string().optional(),
7413
+ fontSize: z6.number().finite().optional(),
7414
+ fill: z6.string().optional(),
7415
+ stroke: z6.string().optional(),
7416
+ strokeWidth: z6.number().finite().optional(),
7417
+ opacity: z6.number().finite().optional()
6699
7418
  })
6700
7419
  ).min(1).max(32)
6701
7420
  });
@@ -6847,14 +7566,14 @@ function buildOutputAssetManifest(input) {
6847
7566
  }
6848
7567
 
6849
7568
  // src/outputs.ts
7569
+ import fs13 from "fs/promises";
7570
+ import path16 from "path";
7571
+ import matter7 from "gray-matter";
7572
+
7573
+ // src/pages.ts
6850
7574
  import fs12 from "fs/promises";
6851
7575
  import path15 from "path";
6852
7576
  import matter6 from "gray-matter";
6853
-
6854
- // src/pages.ts
6855
- import fs11 from "fs/promises";
6856
- import path14 from "path";
6857
- import matter5 from "gray-matter";
6858
7577
  function normalizeStringArray(value) {
6859
7578
  return Array.isArray(value) ? value.filter((item) => typeof item === "string") : [];
6860
7579
  }
@@ -6875,6 +7594,9 @@ function normalizePageStatus(value, fallback = "active") {
6875
7594
  function normalizePageManager(value, fallback = "system") {
6876
7595
  return value === "human" || value === "system" ? value : fallback;
6877
7596
  }
7597
+ function normalizeSourceType(value) {
7598
+ return value === "arxiv" || value === "doi" || value === "tweet" || value === "article" || value === "url" ? value : void 0;
7599
+ }
6878
7600
  function normalizeOutputFormat(value, fallback = "markdown") {
6879
7601
  return value === "report" || value === "slides" || value === "chart" || value === "image" ? value : fallback;
6880
7602
  }
@@ -6925,8 +7647,8 @@ async function loadExistingManagedPageState(absolutePath, defaults = {}) {
6925
7647
  updatedAt: updatedFallback
6926
7648
  };
6927
7649
  }
6928
- const content = await fs11.readFile(absolutePath, "utf8");
6929
- const parsed = matter5(content);
7650
+ const content = await fs12.readFile(absolutePath, "utf8");
7651
+ const parsed = matter6(content);
6930
7652
  return {
6931
7653
  status: normalizePageStatus(parsed.data.status, defaults.status ?? "active"),
6932
7654
  managedBy: normalizePageManager(parsed.data.managed_by, defaults.managedBy ?? "system"),
@@ -6960,11 +7682,11 @@ function inferPageKind(relativePath, explicitKind = void 0) {
6960
7682
  return "index";
6961
7683
  }
6962
7684
  function parseStoredPage(relativePath, content, defaults = {}) {
6963
- const parsed = matter5(content);
7685
+ const parsed = matter6(content);
6964
7686
  const now = (/* @__PURE__ */ new Date()).toISOString();
6965
7687
  const fallbackCreatedAt = defaults.createdAt ?? now;
6966
7688
  const fallbackUpdatedAt = defaults.updatedAt ?? fallbackCreatedAt;
6967
- const title = typeof parsed.data.title === "string" ? parsed.data.title : path14.basename(relativePath, ".md");
7689
+ const title = typeof parsed.data.title === "string" ? parsed.data.title : path15.basename(relativePath, ".md");
6968
7690
  const kind = inferPageKind(relativePath, parsed.data.kind);
6969
7691
  const sourceIds = normalizeStringArray(parsed.data.source_ids);
6970
7692
  const projectIds = normalizeProjectIds(parsed.data.project_ids);
@@ -6980,6 +7702,7 @@ function parseStoredPage(relativePath, content, defaults = {}) {
6980
7702
  path: relativePath,
6981
7703
  title,
6982
7704
  kind,
7705
+ sourceType: normalizeSourceType(parsed.data.source_type),
6983
7706
  sourceIds,
6984
7707
  projectIds,
6985
7708
  nodeIds,
@@ -7003,18 +7726,18 @@ function parseStoredPage(relativePath, content, defaults = {}) {
7003
7726
  };
7004
7727
  }
7005
7728
  async function loadInsightPages(wikiDir) {
7006
- const insightsDir = path14.join(wikiDir, "insights");
7729
+ const insightsDir = path15.join(wikiDir, "insights");
7007
7730
  if (!await fileExists(insightsDir)) {
7008
7731
  return [];
7009
7732
  }
7010
- const files = (await listFilesRecursive(insightsDir)).filter((filePath) => filePath.endsWith(".md")).filter((filePath) => path14.basename(filePath) !== "index.md").sort((left, right) => left.localeCompare(right));
7733
+ const files = (await listFilesRecursive(insightsDir)).filter((filePath) => filePath.endsWith(".md")).filter((filePath) => path15.basename(filePath) !== "index.md").sort((left, right) => left.localeCompare(right));
7011
7734
  const insights = [];
7012
7735
  for (const absolutePath of files) {
7013
- const relativePath = toPosix(path14.relative(wikiDir, absolutePath));
7014
- const content = await fs11.readFile(absolutePath, "utf8");
7015
- const parsed = matter5(content);
7016
- const stats = await fs11.stat(absolutePath);
7017
- const title = typeof parsed.data.title === "string" ? parsed.data.title : path14.basename(absolutePath, ".md");
7736
+ const relativePath = toPosix(path15.relative(wikiDir, absolutePath));
7737
+ const content = await fs12.readFile(absolutePath, "utf8");
7738
+ const parsed = matter6(content);
7739
+ const stats = await fs12.stat(absolutePath);
7740
+ const title = typeof parsed.data.title === "string" ? parsed.data.title : path15.basename(absolutePath, ".md");
7018
7741
  const sourceIds = normalizeStringArray(parsed.data.source_ids);
7019
7742
  const projectIds = normalizeProjectIds(parsed.data.project_ids);
7020
7743
  const nodeIds = normalizeStringArray(parsed.data.node_ids);
@@ -7076,28 +7799,28 @@ function relatedOutputsForPage(targetPage, outputPages) {
7076
7799
  return outputPages.map((page) => ({ page, rank: relationRank(page, targetPage) })).filter((item) => item.rank > 0).sort((left, right) => right.rank - left.rank || left.page.title.localeCompare(right.page.title)).map((item) => item.page);
7077
7800
  }
7078
7801
  async function resolveUniqueOutputSlug(wikiDir, baseSlug) {
7079
- const outputsDir = path15.join(wikiDir, "outputs");
7802
+ const outputsDir = path16.join(wikiDir, "outputs");
7080
7803
  const root = baseSlug || "output";
7081
7804
  let candidate = root;
7082
7805
  let counter = 2;
7083
- while (await fileExists(path15.join(outputsDir, `${candidate}.md`))) {
7806
+ while (await fileExists(path16.join(outputsDir, `${candidate}.md`))) {
7084
7807
  candidate = `${root}-${counter}`;
7085
7808
  counter++;
7086
7809
  }
7087
7810
  return candidate;
7088
7811
  }
7089
7812
  async function loadSavedOutputPages(wikiDir) {
7090
- const outputsDir = path15.join(wikiDir, "outputs");
7091
- const entries = await fs12.readdir(outputsDir, { withFileTypes: true }).catch(() => []);
7813
+ const outputsDir = path16.join(wikiDir, "outputs");
7814
+ const entries = await fs13.readdir(outputsDir, { withFileTypes: true }).catch(() => []);
7092
7815
  const outputs = [];
7093
7816
  for (const entry of entries) {
7094
7817
  if (!entry.isFile() || !entry.name.endsWith(".md") || entry.name === "index.md") {
7095
7818
  continue;
7096
7819
  }
7097
- const relativePath = path15.posix.join("outputs", entry.name);
7098
- const absolutePath = path15.join(outputsDir, entry.name);
7099
- const content = await fs12.readFile(absolutePath, "utf8");
7100
- const parsed = matter6(content);
7820
+ const relativePath = path16.posix.join("outputs", entry.name);
7821
+ const absolutePath = path16.join(outputsDir, entry.name);
7822
+ const content = await fs13.readFile(absolutePath, "utf8");
7823
+ const parsed = matter7(content);
7101
7824
  const slug = entry.name.replace(/\.md$/, "");
7102
7825
  const title = typeof parsed.data.title === "string" ? parsed.data.title : slug;
7103
7826
  const pageId = typeof parsed.data.page_id === "string" ? parsed.data.page_id : `output:${slug}`;
@@ -7109,7 +7832,7 @@ async function loadSavedOutputPages(wikiDir) {
7109
7832
  const relatedSourceIds = normalizeStringArray(parsed.data.related_source_ids);
7110
7833
  const backlinks = normalizeStringArray(parsed.data.backlinks);
7111
7834
  const compiledFrom = normalizeStringArray(parsed.data.compiled_from);
7112
- const stats = await fs12.stat(absolutePath);
7835
+ const stats = await fs13.stat(absolutePath);
7113
7836
  const createdAt = typeof parsed.data.created_at === "string" ? parsed.data.created_at : stats.birthtimeMs > 0 ? stats.birthtime.toISOString() : stats.mtime.toISOString();
7114
7837
  const updatedAt = typeof parsed.data.updated_at === "string" ? parsed.data.updated_at : stats.mtime.toISOString();
7115
7838
  outputs.push({
@@ -7147,9 +7870,9 @@ async function loadSavedOutputPages(wikiDir) {
7147
7870
  }
7148
7871
 
7149
7872
  // src/search.ts
7150
- import fs13 from "fs/promises";
7151
- import path16 from "path";
7152
- import matter7 from "gray-matter";
7873
+ import fs14 from "fs/promises";
7874
+ import path17 from "path";
7875
+ import matter8 from "gray-matter";
7153
7876
  function getDatabaseSync() {
7154
7877
  const builtin = process.getBuiltinModule?.("node:sqlite");
7155
7878
  if (!builtin?.DatabaseSync) {
@@ -7167,8 +7890,11 @@ function normalizeKind(value) {
7167
7890
  function normalizeStatus(value) {
7168
7891
  return value === "draft" || value === "candidate" || value === "active" || value === "archived" ? value : void 0;
7169
7892
  }
7893
+ function normalizeSourceType2(value) {
7894
+ return value === "arxiv" || value === "doi" || value === "tweet" || value === "article" || value === "url" ? value : void 0;
7895
+ }
7170
7896
  async function rebuildSearchIndex(dbPath, pages, wikiDir) {
7171
- await ensureDir(path16.dirname(dbPath));
7897
+ await ensureDir(path17.dirname(dbPath));
7172
7898
  const DatabaseSync = getDatabaseSync();
7173
7899
  const db = new DatabaseSync(dbPath);
7174
7900
  db.exec("PRAGMA journal_mode = WAL;");
@@ -7182,6 +7908,7 @@ async function rebuildSearchIndex(dbPath, pages, wikiDir) {
7182
7908
  body TEXT NOT NULL,
7183
7909
  kind TEXT NOT NULL,
7184
7910
  status TEXT NOT NULL,
7911
+ source_type TEXT NOT NULL,
7185
7912
  project_ids TEXT NOT NULL,
7186
7913
  project_key TEXT NOT NULL
7187
7914
  );
@@ -7195,12 +7922,12 @@ async function rebuildSearchIndex(dbPath, pages, wikiDir) {
7195
7922
  DELETE FROM pages;
7196
7923
  `);
7197
7924
  const insertPage = db.prepare(
7198
- "INSERT INTO pages (id, path, title, body, kind, status, project_ids, project_key) VALUES (?, ?, ?, ?, ?, ?, ?, ?)"
7925
+ "INSERT INTO pages (id, path, title, body, kind, status, source_type, project_ids, project_key) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)"
7199
7926
  );
7200
7927
  for (const page of pages) {
7201
- const absolutePath = path16.join(wikiDir, page.path);
7202
- const content = await fs13.readFile(absolutePath, "utf8");
7203
- const parsed = matter7(content);
7928
+ const absolutePath = path17.join(wikiDir, page.path);
7929
+ const content = await fs14.readFile(absolutePath, "utf8");
7930
+ const parsed = matter8(content);
7204
7931
  insertPage.run(
7205
7932
  page.id,
7206
7933
  page.path,
@@ -7208,6 +7935,7 @@ async function rebuildSearchIndex(dbPath, pages, wikiDir) {
7208
7935
  parsed.content,
7209
7936
  page.kind,
7210
7937
  page.status,
7938
+ typeof parsed.data.source_type === "string" ? parsed.data.source_type : "",
7211
7939
  JSON.stringify(page.projectIds),
7212
7940
  page.projectIds.map((projectId) => `|${projectId}|`).join("")
7213
7941
  );
@@ -7241,6 +7969,10 @@ function searchPages(dbPath, query, limitOrOptions = 5) {
7241
7969
  params.push(`%|${options.project}|%`);
7242
7970
  }
7243
7971
  }
7972
+ if (options.sourceType && options.sourceType !== "all") {
7973
+ clauses.push("pages.source_type = ?");
7974
+ params.push(options.sourceType);
7975
+ }
7244
7976
  const statement = db.prepare(`
7245
7977
  SELECT
7246
7978
  pages.id AS pageId,
@@ -7248,6 +7980,7 @@ function searchPages(dbPath, query, limitOrOptions = 5) {
7248
7980
  pages.title AS title,
7249
7981
  pages.kind AS kind,
7250
7982
  pages.status AS status,
7983
+ pages.source_type AS sourceType,
7251
7984
  pages.project_ids AS projectIds,
7252
7985
  snippet(page_search, 1, '[', ']', '...', 16) AS snippet,
7253
7986
  bm25(page_search) AS rank
@@ -7275,13 +8008,14 @@ function searchPages(dbPath, query, limitOrOptions = 5) {
7275
8008
  title: String(row.title ?? ""),
7276
8009
  kind: normalizeKind(row.kind),
7277
8010
  status: normalizeStatus(row.status),
8011
+ sourceType: normalizeSourceType2(row.sourceType),
7278
8012
  snippet: String(row.snippet ?? ""),
7279
8013
  rank: Number(row.rank ?? 0)
7280
8014
  }));
7281
8015
  }
7282
8016
 
7283
8017
  // src/vault.ts
7284
- function uniqueStrings2(values) {
8018
+ function uniqueStrings3(values) {
7285
8019
  return uniqueBy(values.filter(Boolean), (value) => value);
7286
8020
  }
7287
8021
  function normalizeOutputFormat2(format) {
@@ -7302,7 +8036,7 @@ function outputFormatInstruction(format) {
7302
8036
  }
7303
8037
  }
7304
8038
  function outputAssetPath(slug, fileName) {
7305
- return toPosix(path17.join("outputs", "assets", slug, fileName));
8039
+ return toPosix(path18.join("outputs", "assets", slug, fileName));
7306
8040
  }
7307
8041
  function outputAssetId(slug, role) {
7308
8042
  return `output:${slug}:asset:${role}`;
@@ -7442,7 +8176,7 @@ async function resolveImageGenerationProvider(rootDir) {
7442
8176
  if (!providerConfig) {
7443
8177
  throw new Error(`No provider configured with id "${preferredProviderId}" for task "imageProvider".`);
7444
8178
  }
7445
- const { createProvider: createProvider2 } = await import("./registry-X5PMZTZY.js");
8179
+ const { createProvider: createProvider2 } = await import("./registry-6KZMA3XM.js");
7446
8180
  return createProvider2(preferredProviderId, providerConfig, rootDir);
7447
8181
  }
7448
8182
  async function generateOutputArtifacts(rootDir, input) {
@@ -7640,13 +8374,13 @@ async function generateOutputArtifacts(rootDir, input) {
7640
8374
  };
7641
8375
  }
7642
8376
  function normalizeProjectRoot(root) {
7643
- const normalized = toPosix(path17.posix.normalize(root.replace(/\\/g, "/"))).replace(/^\.\/+/, "").replace(/\/+$/, "");
8377
+ const normalized = toPosix(path18.posix.normalize(root.replace(/\\/g, "/"))).replace(/^\.\/+/, "").replace(/\/+$/, "");
7644
8378
  return normalized;
7645
8379
  }
7646
8380
  function projectEntries(config) {
7647
8381
  return Object.entries(config.projects ?? {}).map(([id, project]) => ({
7648
8382
  id,
7649
- roots: uniqueStrings2(project.roots.map(normalizeProjectRoot)).filter(Boolean),
8383
+ roots: uniqueStrings3(project.roots.map(normalizeProjectRoot)).filter(Boolean),
7650
8384
  schemaPath: project.schemaPath
7651
8385
  })).sort((left, right) => left.id.localeCompare(right.id));
7652
8386
  }
@@ -7666,10 +8400,10 @@ function manifestPathForProject(rootDir, manifest) {
7666
8400
  if (!rawPath) {
7667
8401
  return toPosix(manifest.storedPath);
7668
8402
  }
7669
- if (!path17.isAbsolute(rawPath)) {
8403
+ if (!path18.isAbsolute(rawPath)) {
7670
8404
  return normalizeProjectRoot(rawPath);
7671
8405
  }
7672
- const relative = toPosix(path17.relative(rootDir, rawPath));
8406
+ const relative = toPosix(path18.relative(rootDir, rawPath));
7673
8407
  return relative.startsWith("..") ? toPosix(rawPath) : normalizeProjectRoot(relative);
7674
8408
  }
7675
8409
  function prefixMatches(value, prefix) {
@@ -7694,11 +8428,11 @@ function resolveSourceProjects(rootDir, manifests, config) {
7694
8428
  return Object.fromEntries(manifests.map((manifest) => [manifest.sourceId, resolveSourceProjectId(rootDir, manifest, config)]));
7695
8429
  }
7696
8430
  function scopedProjectIdsFromSources(sourceIds, sourceProjects) {
7697
- const projectIds = uniqueStrings2(sourceIds.map((sourceId) => sourceProjects[sourceId] ?? "").filter(Boolean));
8431
+ const projectIds = uniqueStrings3(sourceIds.map((sourceId) => sourceProjects[sourceId] ?? "").filter(Boolean));
7698
8432
  return projectIds.length === 1 ? projectIds : [];
7699
8433
  }
7700
8434
  function schemaProjectIdsFromPages(pageIds, pageMap2) {
7701
- return uniqueStrings2(
8435
+ return uniqueStrings3(
7702
8436
  pageIds.flatMap((pageId) => pageMap2.get(pageId)?.projectIds ?? []).filter(Boolean).sort((left, right) => left.localeCompare(right))
7703
8437
  );
7704
8438
  }
@@ -7707,7 +8441,7 @@ function categoryTagsForSchema(schema, texts) {
7707
8441
  if (!haystack) {
7708
8442
  return [];
7709
8443
  }
7710
- return uniqueStrings2(
8444
+ return uniqueStrings3(
7711
8445
  schemaCategoryLabels({ path: "", hash: "", content: schema.content }).filter((label) => haystack.includes(label.toLowerCase())).map((label) => `category/${slugify(label)}`)
7712
8446
  ).slice(0, 3);
7713
8447
  }
@@ -7843,7 +8577,7 @@ function pageHashes(pages) {
7843
8577
  return Object.fromEntries(pages.map((page) => [page.page.id, page.contentHash]));
7844
8578
  }
7845
8579
  async function buildManagedGraphPage(absolutePath, defaults, build) {
7846
- const existingContent = await fileExists(absolutePath) ? await fs14.readFile(absolutePath, "utf8") : null;
8580
+ const existingContent = await fileExists(absolutePath) ? await fs15.readFile(absolutePath, "utf8") : null;
7847
8581
  let existing = await loadExistingManagedPageState(absolutePath, {
7848
8582
  status: defaults.status ?? "active",
7849
8583
  managedBy: defaults.managedBy
@@ -7881,7 +8615,7 @@ async function buildManagedGraphPage(absolutePath, defaults, build) {
7881
8615
  return built;
7882
8616
  }
7883
8617
  async function buildManagedContent(absolutePath, defaults, build) {
7884
- const existingContent = await fileExists(absolutePath) ? await fs14.readFile(absolutePath, "utf8") : null;
8618
+ const existingContent = await fileExists(absolutePath) ? await fs15.readFile(absolutePath, "utf8") : null;
7885
8619
  let existing = await loadExistingManagedPageState(absolutePath, {
7886
8620
  status: defaults.status ?? "active",
7887
8621
  managedBy: defaults.managedBy
@@ -7918,7 +8652,7 @@ async function buildManagedContent(absolutePath, defaults, build) {
7918
8652
  return content;
7919
8653
  }
7920
8654
  function indexCompiledFrom(pages) {
7921
- return uniqueStrings2(pages.flatMap((page) => page.sourceIds));
8655
+ return uniqueStrings3(pages.flatMap((page) => page.sourceIds));
7922
8656
  }
7923
8657
  function deriveGraphMetrics(nodes, edges) {
7924
8658
  const adjacency = /* @__PURE__ */ new Map();
@@ -8321,17 +9055,29 @@ function buildGraph(manifests, analyses, pages, sourceProjects, _codeIndex) {
8321
9055
  pages
8322
9056
  };
8323
9057
  }
8324
- async function buildGraphOrientationPages(graph, paths, schemaHash) {
9058
+ function recentResearchSourcePages(graph, previousCompiledAt) {
9059
+ const previousTimestamp = previousCompiledAt ? Date.parse(previousCompiledAt) : Number.NaN;
9060
+ return graph.pages.filter(
9061
+ (page) => page.kind === "source" && Boolean(page.sourceType) && page.sourceType !== "url"
9062
+ ).filter((page) => Number.isNaN(previousTimestamp) || Date.parse(page.updatedAt) > previousTimestamp).sort((left, right) => right.updatedAt.localeCompare(left.updatedAt) || left.title.localeCompare(right.title)).slice(0, 8).map((page) => ({
9063
+ id: page.id,
9064
+ path: page.path,
9065
+ title: page.title,
9066
+ updatedAt: page.updatedAt,
9067
+ sourceType: page.sourceType
9068
+ }));
9069
+ }
9070
+ async function buildGraphOrientationPages(graph, paths, schemaHash, previousCompiledAt) {
8325
9071
  const benchmark = await readJsonFile(paths.benchmarkPath);
8326
9072
  const communityRecords = [];
8327
9073
  for (const community of graph.communities ?? []) {
8328
- const absolutePath = path17.join(paths.wikiDir, "graph", "communities", `${community.id.replace(/^community:/, "")}.md`);
9074
+ const absolutePath = path18.join(paths.wikiDir, "graph", "communities", `${community.id.replace(/^community:/, "")}.md`);
8329
9075
  communityRecords.push(
8330
9076
  await buildManagedGraphPage(
8331
9077
  absolutePath,
8332
9078
  {
8333
9079
  managedBy: "system",
8334
- compiledFrom: uniqueStrings2(
9080
+ compiledFrom: uniqueStrings3(
8335
9081
  community.nodeIds.flatMap((nodeId) => graph.nodes.find((node) => node.id === nodeId)?.sourceIds ?? [])
8336
9082
  ),
8337
9083
  confidence: 1
@@ -8345,26 +9091,36 @@ async function buildGraphOrientationPages(graph, paths, schemaHash) {
8345
9091
  )
8346
9092
  );
8347
9093
  }
8348
- const reportAbsolutePath = path17.join(paths.wikiDir, "graph", "report.md");
9094
+ const report = buildGraphReportArtifact({
9095
+ graph,
9096
+ communityPages: communityRecords.map((record) => record.page),
9097
+ benchmark,
9098
+ benchmarkStale: benchmark ? benchmark.graphHash !== graphHash(graph) : false,
9099
+ recentResearchSources: recentResearchSourcePages(graph, previousCompiledAt),
9100
+ graphHash: graphHash(graph)
9101
+ });
9102
+ const reportAbsolutePath = path18.join(paths.wikiDir, "graph", "report.md");
8349
9103
  const reportRecord = await buildManagedGraphPage(
8350
9104
  reportAbsolutePath,
8351
9105
  {
8352
9106
  managedBy: "system",
8353
- compiledFrom: uniqueStrings2(graph.pages.flatMap((page) => page.sourceIds)),
9107
+ compiledFrom: uniqueStrings3(graph.pages.flatMap((page) => page.sourceIds)),
8354
9108
  confidence: 1
8355
9109
  },
8356
9110
  (metadata) => buildGraphReportPage({
8357
9111
  graph,
8358
9112
  schemaHash,
8359
9113
  metadata,
8360
- communityPages: communityRecords.map((record) => record.page),
8361
- benchmark
9114
+ report
8362
9115
  })
8363
9116
  );
8364
- return [reportRecord, ...communityRecords];
9117
+ return {
9118
+ records: [reportRecord, ...communityRecords],
9119
+ report
9120
+ };
8365
9121
  }
8366
9122
  async function writePage(wikiDir, relativePath, content, changedPages) {
8367
- const absolutePath = path17.resolve(wikiDir, relativePath);
9123
+ const absolutePath = path18.resolve(wikiDir, relativePath);
8368
9124
  const changed = await writeFileIfChanged(absolutePath, content);
8369
9125
  if (changed) {
8370
9126
  changedPages.push(relativePath);
@@ -8426,29 +9182,29 @@ async function requiredCompileArtifactsExist(paths) {
8426
9182
  paths.graphPath,
8427
9183
  paths.codeIndexPath,
8428
9184
  paths.searchDbPath,
8429
- path17.join(paths.wikiDir, "index.md"),
8430
- path17.join(paths.wikiDir, "sources", "index.md"),
8431
- path17.join(paths.wikiDir, "code", "index.md"),
8432
- path17.join(paths.wikiDir, "concepts", "index.md"),
8433
- path17.join(paths.wikiDir, "entities", "index.md"),
8434
- path17.join(paths.wikiDir, "outputs", "index.md"),
8435
- path17.join(paths.wikiDir, "projects", "index.md"),
8436
- path17.join(paths.wikiDir, "candidates", "index.md")
9185
+ path18.join(paths.wikiDir, "index.md"),
9186
+ path18.join(paths.wikiDir, "sources", "index.md"),
9187
+ path18.join(paths.wikiDir, "code", "index.md"),
9188
+ path18.join(paths.wikiDir, "concepts", "index.md"),
9189
+ path18.join(paths.wikiDir, "entities", "index.md"),
9190
+ path18.join(paths.wikiDir, "outputs", "index.md"),
9191
+ path18.join(paths.wikiDir, "projects", "index.md"),
9192
+ path18.join(paths.wikiDir, "candidates", "index.md")
8437
9193
  ];
8438
9194
  const checks = await Promise.all(requiredPaths.map((filePath) => fileExists(filePath)));
8439
9195
  return checks.every(Boolean);
8440
9196
  }
8441
9197
  async function loadAvailableCachedAnalyses(paths, manifests) {
8442
9198
  const analyses = await Promise.all(
8443
- manifests.map(async (manifest) => readJsonFile(path17.join(paths.analysesDir, `${manifest.sourceId}.json`)))
9199
+ manifests.map(async (manifest) => readJsonFile(path18.join(paths.analysesDir, `${manifest.sourceId}.json`)))
8444
9200
  );
8445
9201
  return analyses.filter((analysis) => Boolean(analysis));
8446
9202
  }
8447
9203
  function approvalManifestPath(paths, approvalId) {
8448
- return path17.join(paths.approvalsDir, approvalId, "manifest.json");
9204
+ return path18.join(paths.approvalsDir, approvalId, "manifest.json");
8449
9205
  }
8450
9206
  function approvalGraphPath(paths, approvalId) {
8451
- return path17.join(paths.approvalsDir, approvalId, "state", "graph.json");
9207
+ return path18.join(paths.approvalsDir, approvalId, "state", "graph.json");
8452
9208
  }
8453
9209
  async function readApprovalManifest(paths, approvalId) {
8454
9210
  const manifest = await readJsonFile(approvalManifestPath(paths, approvalId));
@@ -8458,7 +9214,7 @@ async function readApprovalManifest(paths, approvalId) {
8458
9214
  return manifest;
8459
9215
  }
8460
9216
  async function writeApprovalManifest(paths, manifest) {
8461
- await fs14.writeFile(approvalManifestPath(paths, manifest.approvalId), `${JSON.stringify(manifest, null, 2)}
9217
+ await fs15.writeFile(approvalManifestPath(paths, manifest.approvalId), `${JSON.stringify(manifest, null, 2)}
8462
9218
  `, "utf8");
8463
9219
  }
8464
9220
  async function buildApprovalEntries(paths, changedFiles, deletedPaths, previousGraph, graph) {
@@ -8473,7 +9229,7 @@ async function buildApprovalEntries(paths, changedFiles, deletedPaths, previousG
8473
9229
  continue;
8474
9230
  }
8475
9231
  const previousPage = previousPagesById.get(nextPage.id);
8476
- const currentExists = await fileExists(path17.join(paths.wikiDir, file.relativePath));
9232
+ const currentExists = await fileExists(path18.join(paths.wikiDir, file.relativePath));
8477
9233
  if (previousPage && previousPage.path !== nextPage.path) {
8478
9234
  entries.push({
8479
9235
  pageId: nextPage.id,
@@ -8506,7 +9262,7 @@ async function buildApprovalEntries(paths, changedFiles, deletedPaths, previousG
8506
9262
  const previousPage = previousPagesByPath.get(deletedPath);
8507
9263
  entries.push({
8508
9264
  pageId: previousPage?.id ?? `page:${slugify(deletedPath)}`,
8509
- title: previousPage?.title ?? path17.basename(deletedPath, ".md"),
9265
+ title: previousPage?.title ?? path18.basename(deletedPath, ".md"),
8510
9266
  kind: previousPage?.kind ?? "index",
8511
9267
  changeType: "delete",
8512
9268
  status: "pending",
@@ -8518,16 +9274,16 @@ async function buildApprovalEntries(paths, changedFiles, deletedPaths, previousG
8518
9274
  }
8519
9275
  async function stageApprovalBundle(paths, changedFiles, deletedPaths, previousGraph, graph) {
8520
9276
  const approvalId = `compile-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}`;
8521
- const approvalDir = path17.join(paths.approvalsDir, approvalId);
9277
+ const approvalDir = path18.join(paths.approvalsDir, approvalId);
8522
9278
  await ensureDir(approvalDir);
8523
- await ensureDir(path17.join(approvalDir, "wiki"));
8524
- await ensureDir(path17.join(approvalDir, "state"));
9279
+ await ensureDir(path18.join(approvalDir, "wiki"));
9280
+ await ensureDir(path18.join(approvalDir, "state"));
8525
9281
  for (const file of changedFiles) {
8526
- const targetPath = path17.join(approvalDir, "wiki", file.relativePath);
8527
- await ensureDir(path17.dirname(targetPath));
8528
- await fs14.writeFile(targetPath, file.content, "utf8");
9282
+ const targetPath = path18.join(approvalDir, "wiki", file.relativePath);
9283
+ await ensureDir(path18.dirname(targetPath));
9284
+ await fs15.writeFile(targetPath, file.content, "utf8");
8529
9285
  }
8530
- await fs14.writeFile(path17.join(approvalDir, "state", "graph.json"), JSON.stringify(graph, null, 2), "utf8");
9286
+ await fs15.writeFile(path18.join(approvalDir, "state", "graph.json"), JSON.stringify(graph, null, 2), "utf8");
8531
9287
  await writeApprovalManifest(paths, {
8532
9288
  approvalId,
8533
9289
  createdAt: (/* @__PURE__ */ new Date()).toISOString(),
@@ -8587,7 +9343,7 @@ async function syncVaultArtifacts(rootDir, input) {
8587
9343
  confidence: 1
8588
9344
  });
8589
9345
  const sourceRecord = await buildManagedGraphPage(
8590
- path17.join(paths.wikiDir, preview.path),
9346
+ path18.join(paths.wikiDir, preview.path),
8591
9347
  {
8592
9348
  managedBy: "system",
8593
9349
  confidence: 1,
@@ -8632,7 +9388,7 @@ async function syncVaultArtifacts(rootDir, input) {
8632
9388
  );
8633
9389
  records.push(
8634
9390
  await buildManagedGraphPage(
8635
- path17.join(paths.wikiDir, modulePreview.path),
9391
+ path18.join(paths.wikiDir, modulePreview.path),
8636
9392
  {
8637
9393
  managedBy: "system",
8638
9394
  confidence: 1,
@@ -8658,15 +9414,15 @@ async function syncVaultArtifacts(rootDir, input) {
8658
9414
  const itemKind = kind === "concepts" ? "concept" : "entity";
8659
9415
  const slug = slugify(aggregate.name);
8660
9416
  const pageId = `${itemKind}:${slug}`;
8661
- const sourceIds = uniqueStrings2(aggregate.sourceAnalyses.map((item) => item.sourceId));
9417
+ const sourceIds = uniqueStrings3(aggregate.sourceAnalyses.map((item) => item.sourceId));
8662
9418
  const projectIds = scopedProjectIdsFromSources(sourceIds, input.sourceProjects);
8663
9419
  const schemaHash = effectiveHashForProject(input.schemas, projectIds[0] ?? null);
8664
9420
  const previousEntry = input.previousState?.candidateHistory?.[pageId];
8665
9421
  const promoted = previousEntry?.status === "active" || promoteCandidates && shouldPromoteCandidate(previousEntry, sourceIds);
8666
9422
  const relativePath = promoted ? activeAggregatePath(itemKind, slug) : candidatePagePathFor(itemKind, slug);
8667
9423
  const fallbackPaths = [
8668
- path17.join(paths.wikiDir, activeAggregatePath(itemKind, slug)),
8669
- path17.join(paths.wikiDir, candidatePagePathFor(itemKind, slug))
9424
+ path18.join(paths.wikiDir, activeAggregatePath(itemKind, slug)),
9425
+ path18.join(paths.wikiDir, candidatePagePathFor(itemKind, slug))
8670
9426
  ];
8671
9427
  const confidence = nodeConfidence(aggregate.sourceAnalyses.length);
8672
9428
  const preview = emptyGraphPage({
@@ -8683,7 +9439,7 @@ async function syncVaultArtifacts(rootDir, input) {
8683
9439
  status: promoted ? "active" : "candidate"
8684
9440
  });
8685
9441
  const pageRecord = await buildManagedGraphPage(
8686
- path17.join(paths.wikiDir, relativePath),
9442
+ path18.join(paths.wikiDir, relativePath),
8687
9443
  {
8688
9444
  status: promoted ? "active" : "candidate",
8689
9445
  managedBy: "system",
@@ -8724,9 +9480,9 @@ async function syncVaultArtifacts(rootDir, input) {
8724
9480
  const compiledPages = records.map((record) => record.page);
8725
9481
  const basePages = [...compiledPages, ...input.outputPages, ...input.insightPages];
8726
9482
  const baseGraph = buildGraph(input.manifests, input.analyses, basePages, input.sourceProjects, input.codeIndex);
8727
- const graphOrientationRecords = await buildGraphOrientationPages(baseGraph, paths, globalSchemaHash);
8728
- records.push(...graphOrientationRecords);
8729
- const allPages = [...basePages, ...graphOrientationRecords.map((record) => record.page)];
9483
+ const graphOrientation = await buildGraphOrientationPages(baseGraph, paths, globalSchemaHash, input.previousState?.generatedAt);
9484
+ records.push(...graphOrientation.records);
9485
+ const allPages = [...basePages, ...graphOrientation.records.map((record) => record.page)];
8730
9486
  const graph = {
8731
9487
  ...baseGraph,
8732
9488
  pages: allPages
@@ -8764,7 +9520,7 @@ async function syncVaultArtifacts(rootDir, input) {
8764
9520
  confidence: 1
8765
9521
  }),
8766
9522
  content: await buildManagedContent(
8767
- path17.join(paths.wikiDir, "projects", "index.md"),
9523
+ path18.join(paths.wikiDir, "projects", "index.md"),
8768
9524
  {
8769
9525
  managedBy: "system",
8770
9526
  compiledFrom: indexCompiledFrom(projectIndexRefs)
@@ -8788,7 +9544,7 @@ async function syncVaultArtifacts(rootDir, input) {
8788
9544
  records.push({
8789
9545
  page: projectIndexRef,
8790
9546
  content: await buildManagedContent(
8791
- path17.join(paths.wikiDir, projectIndexRef.path),
9547
+ path18.join(paths.wikiDir, projectIndexRef.path),
8792
9548
  {
8793
9549
  managedBy: "system",
8794
9550
  compiledFrom: indexCompiledFrom(Object.values(sections).flat())
@@ -8816,7 +9572,7 @@ async function syncVaultArtifacts(rootDir, input) {
8816
9572
  confidence: 1
8817
9573
  }),
8818
9574
  content: await buildManagedContent(
8819
- path17.join(paths.wikiDir, "index.md"),
9575
+ path18.join(paths.wikiDir, "index.md"),
8820
9576
  {
8821
9577
  managedBy: "system",
8822
9578
  compiledFrom: indexCompiledFrom(allPages)
@@ -8847,7 +9603,7 @@ async function syncVaultArtifacts(rootDir, input) {
8847
9603
  confidence: 1
8848
9604
  }),
8849
9605
  content: await buildManagedContent(
8850
- path17.join(paths.wikiDir, relativePath),
9606
+ path18.join(paths.wikiDir, relativePath),
8851
9607
  {
8852
9608
  managedBy: "system",
8853
9609
  compiledFrom: indexCompiledFrom(pages)
@@ -8858,12 +9614,12 @@ async function syncVaultArtifacts(rootDir, input) {
8858
9614
  }
8859
9615
  const nextPagePaths = new Set(records.map((record) => record.page.path));
8860
9616
  const obsoleteGraphPaths = (previousGraph?.pages ?? []).filter((page) => page.kind !== "output" && page.kind !== "insight").map((page) => page.path).filter((relativePath) => !nextPagePaths.has(relativePath));
8861
- const existingProjectIndexPaths = (await listFilesRecursive(paths.projectsDir)).filter((absolutePath) => absolutePath.endsWith(".md")).map((absolutePath) => toPosix(path17.relative(paths.wikiDir, absolutePath))).filter((relativePath) => !nextPagePaths.has(relativePath));
8862
- const obsoletePaths = uniqueStrings2([...obsoleteGraphPaths, ...existingProjectIndexPaths]);
9617
+ const existingProjectIndexPaths = (await listFilesRecursive(paths.projectsDir)).filter((absolutePath) => absolutePath.endsWith(".md")).map((absolutePath) => toPosix(path18.relative(paths.wikiDir, absolutePath))).filter((relativePath) => !nextPagePaths.has(relativePath));
9618
+ const obsoletePaths = uniqueStrings3([...obsoleteGraphPaths, ...existingProjectIndexPaths]);
8863
9619
  const changedFiles = [];
8864
9620
  for (const record of records) {
8865
- const absolutePath = path17.join(paths.wikiDir, record.page.path);
8866
- const current = await fileExists(absolutePath) ? await fs14.readFile(absolutePath, "utf8") : null;
9621
+ const absolutePath = path18.join(paths.wikiDir, record.page.path);
9622
+ const current = await fileExists(absolutePath) ? await fs15.readFile(absolutePath, "utf8") : null;
8867
9623
  if (current !== record.content) {
8868
9624
  changedPages.push(record.page.path);
8869
9625
  changedFiles.push({ relativePath: record.page.path, content: record.content });
@@ -8888,9 +9644,10 @@ async function syncVaultArtifacts(rootDir, input) {
8888
9644
  await writePage(paths.wikiDir, record.page.path, record.content, writeChanges);
8889
9645
  }
8890
9646
  for (const relativePath of obsoletePaths) {
8891
- await fs14.rm(path17.join(paths.wikiDir, relativePath), { force: true });
9647
+ await fs15.rm(path18.join(paths.wikiDir, relativePath), { force: true });
8892
9648
  }
8893
9649
  await writeJsonFile(paths.graphPath, graph);
9650
+ await writeJsonFile(path18.join(paths.wikiDir, "graph", "report.json"), graphOrientation.report);
8894
9651
  await writeJsonFile(paths.codeIndexPath, input.codeIndex);
8895
9652
  await writeJsonFile(paths.compileStatePath, {
8896
9653
  generatedAt: graph.generatedAt,
@@ -8916,7 +9673,7 @@ async function syncVaultArtifacts(rootDir, input) {
8916
9673
  return {
8917
9674
  graph,
8918
9675
  allPages,
8919
- changedPages: uniqueStrings2([...changedPages, ...writeChanges]),
9676
+ changedPages: uniqueStrings3([...changedPages, ...writeChanges]),
8920
9677
  promotedPageIds,
8921
9678
  candidatePageCount: candidatePages.length,
8922
9679
  staged: false
@@ -8925,18 +9682,20 @@ async function syncVaultArtifacts(rootDir, input) {
8925
9682
  async function refreshIndexesAndSearch(rootDir, pages) {
8926
9683
  const { config, paths } = await loadVaultConfig(rootDir);
8927
9684
  const schemas = await loadVaultSchemas(rootDir);
9685
+ const compileState = await readJsonFile(paths.compileStatePath);
8928
9686
  const globalSchemaHash = schemas.effective.global.hash;
8929
9687
  const currentGraph = await readJsonFile(paths.graphPath);
8930
9688
  const basePages = pages.filter((page) => page.kind !== "graph_report" && page.kind !== "community_summary");
8931
- const graphOrientationRecords = currentGraph ? await buildGraphOrientationPages(
9689
+ const graphOrientation = currentGraph ? await buildGraphOrientationPages(
8932
9690
  {
8933
9691
  ...currentGraph,
8934
9692
  pages: basePages
8935
9693
  },
8936
9694
  paths,
8937
- globalSchemaHash
8938
- ) : [];
8939
- const pagesWithGraph = sortGraphPages([...basePages, ...graphOrientationRecords.map((record) => record.page)]);
9695
+ globalSchemaHash,
9696
+ compileState?.generatedAt
9697
+ ) : { records: [], report: null };
9698
+ const pagesWithGraph = sortGraphPages([...basePages, ...graphOrientation.records.map((record) => record.page)]);
8940
9699
  if (currentGraph) {
8941
9700
  await writeJsonFile(paths.graphPath, {
8942
9701
  ...currentGraph,
@@ -8959,17 +9718,17 @@ async function refreshIndexesAndSearch(rootDir, pages) {
8959
9718
  })
8960
9719
  );
8961
9720
  await Promise.all([
8962
- ensureDir(path17.join(paths.wikiDir, "sources")),
8963
- ensureDir(path17.join(paths.wikiDir, "code")),
8964
- ensureDir(path17.join(paths.wikiDir, "concepts")),
8965
- ensureDir(path17.join(paths.wikiDir, "entities")),
8966
- ensureDir(path17.join(paths.wikiDir, "outputs")),
8967
- ensureDir(path17.join(paths.wikiDir, "graph")),
8968
- ensureDir(path17.join(paths.wikiDir, "graph", "communities")),
8969
- ensureDir(path17.join(paths.wikiDir, "projects")),
8970
- ensureDir(path17.join(paths.wikiDir, "candidates"))
9721
+ ensureDir(path18.join(paths.wikiDir, "sources")),
9722
+ ensureDir(path18.join(paths.wikiDir, "code")),
9723
+ ensureDir(path18.join(paths.wikiDir, "concepts")),
9724
+ ensureDir(path18.join(paths.wikiDir, "entities")),
9725
+ ensureDir(path18.join(paths.wikiDir, "outputs")),
9726
+ ensureDir(path18.join(paths.wikiDir, "graph")),
9727
+ ensureDir(path18.join(paths.wikiDir, "graph", "communities")),
9728
+ ensureDir(path18.join(paths.wikiDir, "projects")),
9729
+ ensureDir(path18.join(paths.wikiDir, "candidates"))
8971
9730
  ]);
8972
- const projectsIndexPath = path17.join(paths.wikiDir, "projects", "index.md");
9731
+ const projectsIndexPath = path18.join(paths.wikiDir, "projects", "index.md");
8973
9732
  await writeFileIfChanged(
8974
9733
  projectsIndexPath,
8975
9734
  await buildManagedContent(
@@ -8990,7 +9749,7 @@ async function refreshIndexesAndSearch(rootDir, pages) {
8990
9749
  outputs: pages.filter((page) => page.kind === "output" && page.projectIds.includes(project.id)),
8991
9750
  candidates: pages.filter((page) => page.status === "candidate" && page.projectIds.includes(project.id))
8992
9751
  };
8993
- const absolutePath = path17.join(paths.wikiDir, "projects", project.id, "index.md");
9752
+ const absolutePath = path18.join(paths.wikiDir, "projects", project.id, "index.md");
8994
9753
  await writeFileIfChanged(
8995
9754
  absolutePath,
8996
9755
  await buildManagedContent(
@@ -9008,7 +9767,7 @@ async function refreshIndexesAndSearch(rootDir, pages) {
9008
9767
  )
9009
9768
  );
9010
9769
  }
9011
- const rootIndexPath = path17.join(paths.wikiDir, "index.md");
9770
+ const rootIndexPath = path18.join(paths.wikiDir, "index.md");
9012
9771
  await writeFileIfChanged(
9013
9772
  rootIndexPath,
9014
9773
  await buildManagedContent(
@@ -9029,7 +9788,7 @@ async function refreshIndexesAndSearch(rootDir, pages) {
9029
9788
  ["candidates/index.md", "candidates", pagesWithGraph.filter((page) => page.status === "candidate")],
9030
9789
  ["graph/index.md", "graph", pagesWithGraph.filter((page) => page.kind === "graph_report" || page.kind === "community_summary")]
9031
9790
  ]) {
9032
- const absolutePath = path17.join(paths.wikiDir, relativePath);
9791
+ const absolutePath = path18.join(paths.wikiDir, relativePath);
9033
9792
  await writeFileIfChanged(
9034
9793
  absolutePath,
9035
9794
  await buildManagedContent(
@@ -9042,21 +9801,24 @@ async function refreshIndexesAndSearch(rootDir, pages) {
9042
9801
  )
9043
9802
  );
9044
9803
  }
9045
- for (const record of graphOrientationRecords) {
9046
- await writeFileIfChanged(path17.join(paths.wikiDir, record.page.path), record.content);
9804
+ for (const record of graphOrientation.records) {
9805
+ await writeFileIfChanged(path18.join(paths.wikiDir, record.page.path), record.content);
9047
9806
  }
9048
- const existingProjectIndexPaths = (await listFilesRecursive(paths.projectsDir)).filter((absolutePath) => absolutePath.endsWith(".md")).map((absolutePath) => toPosix(path17.relative(paths.wikiDir, absolutePath)));
9807
+ if (graphOrientation.report) {
9808
+ await writeJsonFile(path18.join(paths.wikiDir, "graph", "report.json"), graphOrientation.report);
9809
+ }
9810
+ const existingProjectIndexPaths = (await listFilesRecursive(paths.projectsDir)).filter((absolutePath) => absolutePath.endsWith(".md")).map((absolutePath) => toPosix(path18.relative(paths.wikiDir, absolutePath)));
9049
9811
  const allowedProjectIndexPaths = /* @__PURE__ */ new Set([
9050
9812
  "projects/index.md",
9051
9813
  ...configuredProjects.map((project) => `projects/${project.id}/index.md`)
9052
9814
  ]);
9053
9815
  await Promise.all(
9054
- existingProjectIndexPaths.filter((relativePath) => !allowedProjectIndexPaths.has(relativePath)).map((relativePath) => fs14.rm(path17.join(paths.wikiDir, relativePath), { force: true }))
9816
+ existingProjectIndexPaths.filter((relativePath) => !allowedProjectIndexPaths.has(relativePath)).map((relativePath) => fs15.rm(path18.join(paths.wikiDir, relativePath), { force: true }))
9055
9817
  );
9056
- const existingGraphPages = (await listFilesRecursive(path17.join(paths.wikiDir, "graph").replace(/\/$/, "")).catch(() => [])).filter((absolutePath) => absolutePath.endsWith(".md")).map((absolutePath) => toPosix(path17.relative(paths.wikiDir, absolutePath)));
9057
- const allowedGraphPages = /* @__PURE__ */ new Set(["graph/index.md", ...graphOrientationRecords.map((record) => record.page.path)]);
9818
+ const existingGraphPages = (await listFilesRecursive(path18.join(paths.wikiDir, "graph").replace(/\/$/, "")).catch(() => [])).filter((absolutePath) => absolutePath.endsWith(".md")).map((absolutePath) => toPosix(path18.relative(paths.wikiDir, absolutePath)));
9819
+ const allowedGraphPages = /* @__PURE__ */ new Set(["graph/index.md", ...graphOrientation.records.map((record) => record.page.path)]);
9058
9820
  await Promise.all(
9059
- existingGraphPages.filter((relativePath) => !allowedGraphPages.has(relativePath)).map((relativePath) => fs14.rm(path17.join(paths.wikiDir, relativePath), { force: true }))
9821
+ existingGraphPages.filter((relativePath) => !allowedGraphPages.has(relativePath)).map((relativePath) => fs15.rm(path18.join(paths.wikiDir, relativePath), { force: true }))
9060
9822
  );
9061
9823
  await rebuildSearchIndex(paths.searchDbPath, pagesWithGraph, paths.wikiDir);
9062
9824
  }
@@ -9071,12 +9833,12 @@ async function prepareOutputPageSave(rootDir, input) {
9071
9833
  status: "active",
9072
9834
  createdAt: now,
9073
9835
  updatedAt: now,
9074
- compiledFrom: uniqueStrings2(input.relatedSourceIds ?? input.citations),
9836
+ compiledFrom: uniqueStrings3(input.relatedSourceIds ?? input.citations),
9075
9837
  managedBy: "system",
9076
9838
  confidence: 0.74
9077
9839
  }
9078
9840
  });
9079
- const absolutePath = path17.join(paths.wikiDir, output.page.path);
9841
+ const absolutePath = path18.join(paths.wikiDir, output.page.path);
9080
9842
  return {
9081
9843
  page: output.page,
9082
9844
  savedPath: absolutePath,
@@ -9088,15 +9850,15 @@ async function prepareOutputPageSave(rootDir, input) {
9088
9850
  async function persistOutputPage(rootDir, input) {
9089
9851
  const { paths } = await loadVaultConfig(rootDir);
9090
9852
  const prepared = await prepareOutputPageSave(rootDir, input);
9091
- await ensureDir(path17.dirname(prepared.savedPath));
9092
- await fs14.writeFile(prepared.savedPath, prepared.content, "utf8");
9853
+ await ensureDir(path18.dirname(prepared.savedPath));
9854
+ await fs15.writeFile(prepared.savedPath, prepared.content, "utf8");
9093
9855
  for (const assetFile of prepared.assetFiles) {
9094
- const assetPath = path17.join(paths.wikiDir, assetFile.relativePath);
9095
- await ensureDir(path17.dirname(assetPath));
9856
+ const assetPath = path18.join(paths.wikiDir, assetFile.relativePath);
9857
+ await ensureDir(path18.dirname(assetPath));
9096
9858
  if (typeof assetFile.content === "string") {
9097
- await fs14.writeFile(assetPath, assetFile.content, assetFile.encoding ?? "utf8");
9859
+ await fs15.writeFile(assetPath, assetFile.content, assetFile.encoding ?? "utf8");
9098
9860
  } else {
9099
- await fs14.writeFile(assetPath, assetFile.content);
9861
+ await fs15.writeFile(assetPath, assetFile.content);
9100
9862
  }
9101
9863
  }
9102
9864
  return { page: prepared.page, savedPath: prepared.savedPath, outputAssets: prepared.outputAssets };
@@ -9112,12 +9874,12 @@ async function prepareExploreHubSave(rootDir, input) {
9112
9874
  status: "active",
9113
9875
  createdAt: now,
9114
9876
  updatedAt: now,
9115
- compiledFrom: uniqueStrings2(input.citations),
9877
+ compiledFrom: uniqueStrings3(input.citations),
9116
9878
  managedBy: "system",
9117
9879
  confidence: 0.76
9118
9880
  }
9119
9881
  });
9120
- const absolutePath = path17.join(paths.wikiDir, hub.page.path);
9882
+ const absolutePath = path18.join(paths.wikiDir, hub.page.path);
9121
9883
  return {
9122
9884
  page: hub.page,
9123
9885
  savedPath: absolutePath,
@@ -9129,15 +9891,15 @@ async function prepareExploreHubSave(rootDir, input) {
9129
9891
  async function persistExploreHub(rootDir, input) {
9130
9892
  const { paths } = await loadVaultConfig(rootDir);
9131
9893
  const prepared = await prepareExploreHubSave(rootDir, input);
9132
- await ensureDir(path17.dirname(prepared.savedPath));
9133
- await fs14.writeFile(prepared.savedPath, prepared.content, "utf8");
9894
+ await ensureDir(path18.dirname(prepared.savedPath));
9895
+ await fs15.writeFile(prepared.savedPath, prepared.content, "utf8");
9134
9896
  for (const assetFile of prepared.assetFiles) {
9135
- const assetPath = path17.join(paths.wikiDir, assetFile.relativePath);
9136
- await ensureDir(path17.dirname(assetPath));
9897
+ const assetPath = path18.join(paths.wikiDir, assetFile.relativePath);
9898
+ await ensureDir(path18.dirname(assetPath));
9137
9899
  if (typeof assetFile.content === "string") {
9138
- await fs14.writeFile(assetPath, assetFile.content, assetFile.encoding ?? "utf8");
9900
+ await fs15.writeFile(assetPath, assetFile.content, assetFile.encoding ?? "utf8");
9139
9901
  } else {
9140
- await fs14.writeFile(assetPath, assetFile.content);
9902
+ await fs15.writeFile(assetPath, assetFile.content);
9141
9903
  }
9142
9904
  }
9143
9905
  return { page: prepared.page, savedPath: prepared.savedPath, outputAssets: prepared.outputAssets };
@@ -9154,17 +9916,17 @@ async function stageOutputApprovalBundle(rootDir, stagedPages) {
9154
9916
  }))
9155
9917
  ]);
9156
9918
  const approvalId = `schedule-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}`;
9157
- const approvalDir = path17.join(paths.approvalsDir, approvalId);
9919
+ const approvalDir = path18.join(paths.approvalsDir, approvalId);
9158
9920
  await ensureDir(approvalDir);
9159
- await ensureDir(path17.join(approvalDir, "wiki"));
9160
- await ensureDir(path17.join(approvalDir, "state"));
9921
+ await ensureDir(path18.join(approvalDir, "wiki"));
9922
+ await ensureDir(path18.join(approvalDir, "state"));
9161
9923
  for (const file of changedFiles) {
9162
- const targetPath = path17.join(approvalDir, "wiki", file.relativePath);
9163
- await ensureDir(path17.dirname(targetPath));
9924
+ const targetPath = path18.join(approvalDir, "wiki", file.relativePath);
9925
+ await ensureDir(path18.dirname(targetPath));
9164
9926
  if ("binary" in file && file.binary) {
9165
- await fs14.writeFile(targetPath, Buffer.from(file.content, "base64"));
9927
+ await fs15.writeFile(targetPath, Buffer.from(file.content, "base64"));
9166
9928
  } else {
9167
- await fs14.writeFile(targetPath, file.content, "utf8");
9929
+ await fs15.writeFile(targetPath, file.content, "utf8");
9168
9930
  }
9169
9931
  }
9170
9932
  const nextPages = sortGraphPages([
@@ -9178,7 +9940,7 @@ async function stageOutputApprovalBundle(rootDir, stagedPages) {
9178
9940
  sources: previousGraph?.sources ?? [],
9179
9941
  pages: nextPages
9180
9942
  };
9181
- await fs14.writeFile(path17.join(approvalDir, "state", "graph.json"), JSON.stringify(graph, null, 2), "utf8");
9943
+ await fs15.writeFile(path18.join(approvalDir, "state", "graph.json"), JSON.stringify(graph, null, 2), "utf8");
9182
9944
  await writeApprovalManifest(paths, {
9183
9945
  approvalId,
9184
9946
  createdAt: (/* @__PURE__ */ new Date()).toISOString(),
@@ -9207,10 +9969,10 @@ async function executeQuery(rootDir, question, format) {
9207
9969
  const searchResults = searchPages(paths.searchDbPath, question, 5);
9208
9970
  const excerpts = await Promise.all(
9209
9971
  searchResults.map(async (result) => {
9210
- const absolutePath = path17.join(paths.wikiDir, result.path);
9972
+ const absolutePath = path18.join(paths.wikiDir, result.path);
9211
9973
  try {
9212
- const content = await fs14.readFile(absolutePath, "utf8");
9213
- const parsed = matter8(content);
9974
+ const content = await fs15.readFile(absolutePath, "utf8");
9975
+ const parsed = matter9(content);
9214
9976
  return `# ${result.title}
9215
9977
  ${truncate(normalizeWhitespace(parsed.content), 1200)}`;
9216
9978
  } catch {
@@ -9307,8 +10069,8 @@ async function generateFollowUpQuestions(rootDir, question, answer) {
9307
10069
  Current answer:
9308
10070
  ${answer}`
9309
10071
  },
9310
- z6.object({
9311
- questions: z6.array(z6.string().min(1)).max(5)
10072
+ z7.object({
10073
+ questions: z7.array(z7.string().min(1)).max(5)
9312
10074
  })
9313
10075
  );
9314
10076
  return uniqueBy(response.questions, (item) => item).filter((item) => item !== question);
@@ -9391,7 +10153,7 @@ function sortGraphPages(pages) {
9391
10153
  async function listApprovals(rootDir) {
9392
10154
  const { paths } = await loadVaultConfig(rootDir);
9393
10155
  const manifests = await Promise.all(
9394
- (await fs14.readdir(paths.approvalsDir, { withFileTypes: true }).catch(() => [])).filter((entry) => entry.isDirectory()).map(async (entry) => {
10156
+ (await fs15.readdir(paths.approvalsDir, { withFileTypes: true }).catch(() => [])).filter((entry) => entry.isDirectory()).map(async (entry) => {
9395
10157
  try {
9396
10158
  return await readApprovalManifest(paths, entry.name);
9397
10159
  } catch {
@@ -9407,8 +10169,8 @@ async function readApproval(rootDir, approvalId) {
9407
10169
  const details = await Promise.all(
9408
10170
  manifest.entries.map(async (entry) => {
9409
10171
  const currentPath = entry.previousPath ?? entry.nextPath;
9410
- const currentContent = currentPath ? await fs14.readFile(path17.join(paths.wikiDir, currentPath), "utf8").catch(() => void 0) : void 0;
9411
- const stagedContent = entry.nextPath ? await fs14.readFile(path17.join(paths.approvalsDir, approvalId, "wiki", entry.nextPath), "utf8").catch(() => void 0) : void 0;
10172
+ const currentContent = currentPath ? await fs15.readFile(path18.join(paths.wikiDir, currentPath), "utf8").catch(() => void 0) : void 0;
10173
+ const stagedContent = entry.nextPath ? await fs15.readFile(path18.join(paths.approvalsDir, approvalId, "wiki", entry.nextPath), "utf8").catch(() => void 0) : void 0;
9412
10174
  return {
9413
10175
  ...entry,
9414
10176
  currentContent,
@@ -9436,26 +10198,26 @@ async function acceptApproval(rootDir, approvalId, targets = []) {
9436
10198
  if (!entry.nextPath) {
9437
10199
  throw new Error(`Approval entry ${entry.pageId} is missing a staged path.`);
9438
10200
  }
9439
- const stagedAbsolutePath = path17.join(paths.approvalsDir, approvalId, "wiki", entry.nextPath);
9440
- const stagedContent = await fs14.readFile(stagedAbsolutePath, "utf8");
9441
- const targetAbsolutePath = path17.join(paths.wikiDir, entry.nextPath);
9442
- await ensureDir(path17.dirname(targetAbsolutePath));
9443
- await fs14.writeFile(targetAbsolutePath, stagedContent, "utf8");
10201
+ const stagedAbsolutePath = path18.join(paths.approvalsDir, approvalId, "wiki", entry.nextPath);
10202
+ const stagedContent = await fs15.readFile(stagedAbsolutePath, "utf8");
10203
+ const targetAbsolutePath = path18.join(paths.wikiDir, entry.nextPath);
10204
+ await ensureDir(path18.dirname(targetAbsolutePath));
10205
+ await fs15.writeFile(targetAbsolutePath, stagedContent, "utf8");
9444
10206
  if (entry.changeType === "promote" && entry.previousPath) {
9445
- await fs14.rm(path17.join(paths.wikiDir, entry.previousPath), { force: true });
10207
+ await fs15.rm(path18.join(paths.wikiDir, entry.previousPath), { force: true });
9446
10208
  }
9447
10209
  const nextPage = bundleGraph?.pages.find((page) => page.id === entry.pageId && page.path === entry.nextPath) ?? parseStoredPage(entry.nextPath, stagedContent);
9448
10210
  if (nextPage.kind === "output" && nextPage.outputAssets?.length) {
9449
- const outputAssetDir = path17.join(paths.wikiDir, "outputs", "assets", path17.basename(nextPage.path, ".md"));
9450
- await fs14.rm(outputAssetDir, { recursive: true, force: true });
10211
+ const outputAssetDir = path18.join(paths.wikiDir, "outputs", "assets", path18.basename(nextPage.path, ".md"));
10212
+ await fs15.rm(outputAssetDir, { recursive: true, force: true });
9451
10213
  for (const asset of nextPage.outputAssets) {
9452
- const stagedAssetPath = path17.join(paths.approvalsDir, approvalId, "wiki", asset.path);
10214
+ const stagedAssetPath = path18.join(paths.approvalsDir, approvalId, "wiki", asset.path);
9453
10215
  if (!await fileExists(stagedAssetPath)) {
9454
10216
  continue;
9455
10217
  }
9456
- const targetAssetPath = path17.join(paths.wikiDir, asset.path);
9457
- await ensureDir(path17.dirname(targetAssetPath));
9458
- await fs14.copyFile(stagedAssetPath, targetAssetPath);
10218
+ const targetAssetPath = path18.join(paths.wikiDir, asset.path);
10219
+ await ensureDir(path18.dirname(targetAssetPath));
10220
+ await fs15.copyFile(stagedAssetPath, targetAssetPath);
9459
10221
  }
9460
10222
  }
9461
10223
  nextPages = nextPages.filter(
@@ -9466,10 +10228,10 @@ async function acceptApproval(rootDir, approvalId, targets = []) {
9466
10228
  } else {
9467
10229
  const deletedPage = nextPages.find((page) => page.id === entry.pageId || page.path === entry.previousPath) ?? bundleGraph?.pages.find((page) => page.id === entry.pageId || page.path === entry.previousPath) ?? null;
9468
10230
  if (entry.previousPath) {
9469
- await fs14.rm(path17.join(paths.wikiDir, entry.previousPath), { force: true });
10231
+ await fs15.rm(path18.join(paths.wikiDir, entry.previousPath), { force: true });
9470
10232
  }
9471
10233
  if (deletedPage?.kind === "output") {
9472
- await fs14.rm(path17.join(paths.wikiDir, "outputs", "assets", path17.basename(deletedPage.path, ".md")), {
10234
+ await fs15.rm(path18.join(paths.wikiDir, "outputs", "assets", path18.basename(deletedPage.path, ".md")), {
9473
10235
  recursive: true,
9474
10236
  force: true
9475
10237
  });
@@ -9559,22 +10321,22 @@ async function promoteCandidate(rootDir, target) {
9559
10321
  const { paths } = await loadVaultConfig(rootDir);
9560
10322
  const graph = await readJsonFile(paths.graphPath);
9561
10323
  const candidate = resolveCandidateTarget(graph?.pages ?? [], target);
9562
- const raw = await fs14.readFile(path17.join(paths.wikiDir, candidate.path), "utf8");
9563
- const parsed = matter8(raw);
10324
+ const raw = await fs15.readFile(path18.join(paths.wikiDir, candidate.path), "utf8");
10325
+ const parsed = matter9(raw);
9564
10326
  const nextUpdatedAt = (/* @__PURE__ */ new Date()).toISOString();
9565
- const nextContent = matter8.stringify(parsed.content, {
10327
+ const nextContent = matter9.stringify(parsed.content, {
9566
10328
  ...parsed.data,
9567
10329
  status: "active",
9568
10330
  updated_at: nextUpdatedAt,
9569
- tags: uniqueStrings2([candidate.kind, ...Array.isArray(parsed.data.tags) ? parsed.data.tags : []]).filter(
10331
+ tags: uniqueStrings3([candidate.kind, ...Array.isArray(parsed.data.tags) ? parsed.data.tags : []]).filter(
9570
10332
  (tag) => tag !== "candidate"
9571
10333
  )
9572
10334
  });
9573
10335
  const nextPath = candidateActivePath(candidate);
9574
- const nextAbsolutePath = path17.join(paths.wikiDir, nextPath);
9575
- await ensureDir(path17.dirname(nextAbsolutePath));
9576
- await fs14.writeFile(nextAbsolutePath, nextContent, "utf8");
9577
- await fs14.rm(path17.join(paths.wikiDir, candidate.path), { force: true });
10336
+ const nextAbsolutePath = path18.join(paths.wikiDir, nextPath);
10337
+ await ensureDir(path18.dirname(nextAbsolutePath));
10338
+ await fs15.writeFile(nextAbsolutePath, nextContent, "utf8");
10339
+ await fs15.rm(path18.join(paths.wikiDir, candidate.path), { force: true });
9578
10340
  const nextPage = parseStoredPage(nextPath, nextContent, { createdAt: candidate.createdAt, updatedAt: nextUpdatedAt });
9579
10341
  const nextPages = sortGraphPages(
9580
10342
  (graph?.pages ?? []).filter((page) => page.id !== candidate.id && page.path !== candidate.path).concat(nextPage)
@@ -9618,7 +10380,7 @@ async function archiveCandidate(rootDir, target) {
9618
10380
  const { paths } = await loadVaultConfig(rootDir);
9619
10381
  const graph = await readJsonFile(paths.graphPath);
9620
10382
  const candidate = resolveCandidateTarget(graph?.pages ?? [], target);
9621
- await fs14.rm(path17.join(paths.wikiDir, candidate.path), { force: true });
10383
+ await fs15.rm(path18.join(paths.wikiDir, candidate.path), { force: true });
9622
10384
  const nextPages = sortGraphPages((graph?.pages ?? []).filter((page) => page.id !== candidate.id && page.path !== candidate.path));
9623
10385
  const nextGraph = {
9624
10386
  generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
@@ -9656,18 +10418,18 @@ async function archiveCandidate(rootDir, target) {
9656
10418
  }
9657
10419
  async function ensureObsidianWorkspace(rootDir) {
9658
10420
  const { config } = await loadVaultConfig(rootDir);
9659
- const obsidianDir = path17.join(rootDir, ".obsidian");
10421
+ const obsidianDir = path18.join(rootDir, ".obsidian");
9660
10422
  const projectIds = projectEntries(config).map((project) => project.id);
9661
10423
  await ensureDir(obsidianDir);
9662
10424
  await Promise.all([
9663
- writeJsonFile(path17.join(obsidianDir, "app.json"), {
10425
+ writeJsonFile(path18.join(obsidianDir, "app.json"), {
9664
10426
  alwaysUpdateLinks: true,
9665
10427
  newFileLocation: "folder",
9666
10428
  newFileFolderPath: "wiki/insights",
9667
10429
  useMarkdownLinks: false,
9668
10430
  attachmentFolderPath: "raw/assets"
9669
10431
  }),
9670
- writeJsonFile(path17.join(obsidianDir, "core-plugins.json"), [
10432
+ writeJsonFile(path18.join(obsidianDir, "core-plugins.json"), [
9671
10433
  "file-explorer",
9672
10434
  "global-search",
9673
10435
  "switcher",
@@ -9677,7 +10439,7 @@ async function ensureObsidianWorkspace(rootDir) {
9677
10439
  "tag-pane",
9678
10440
  "page-preview"
9679
10441
  ]),
9680
- writeJsonFile(path17.join(obsidianDir, "graph.json"), {
10442
+ writeJsonFile(path18.join(obsidianDir, "graph.json"), {
9681
10443
  "collapse-filter": false,
9682
10444
  search: "",
9683
10445
  showTags: true,
@@ -9689,7 +10451,7 @@ async function ensureObsidianWorkspace(rootDir) {
9689
10451
  })),
9690
10452
  localJumps: false
9691
10453
  }),
9692
- writeJsonFile(path17.join(obsidianDir, "workspace.json"), {
10454
+ writeJsonFile(path18.join(obsidianDir, "workspace.json"), {
9693
10455
  active: "root",
9694
10456
  lastOpenFiles: ["wiki/index.md", "wiki/projects/index.md", "wiki/candidates/index.md", "wiki/insights/index.md"],
9695
10457
  left: {
@@ -9704,11 +10466,11 @@ async function ensureObsidianWorkspace(rootDir) {
9704
10466
  async function initVault(rootDir, options = {}) {
9705
10467
  const { paths } = await initWorkspace(rootDir);
9706
10468
  await installConfiguredAgents(rootDir);
9707
- const insightsIndexPath = path17.join(paths.wikiDir, "insights", "index.md");
10469
+ const insightsIndexPath = path18.join(paths.wikiDir, "insights", "index.md");
9708
10470
  const now = (/* @__PURE__ */ new Date()).toISOString();
9709
10471
  await writeFileIfChanged(
9710
10472
  insightsIndexPath,
9711
- matter8.stringify(
10473
+ matter9.stringify(
9712
10474
  [
9713
10475
  "# Insights",
9714
10476
  "",
@@ -9740,8 +10502,8 @@ async function initVault(rootDir, options = {}) {
9740
10502
  )
9741
10503
  );
9742
10504
  await writeFileIfChanged(
9743
- path17.join(paths.wikiDir, "projects", "index.md"),
9744
- matter8.stringify(["# Projects", "", "- Run `swarmvault compile` to build project rollups.", ""].join("\n"), {
10505
+ path18.join(paths.wikiDir, "projects", "index.md"),
10506
+ matter9.stringify(["# Projects", "", "- Run `swarmvault compile` to build project rollups.", ""].join("\n"), {
9745
10507
  page_id: "projects:index",
9746
10508
  kind: "index",
9747
10509
  title: "Projects",
@@ -9762,8 +10524,8 @@ async function initVault(rootDir, options = {}) {
9762
10524
  })
9763
10525
  );
9764
10526
  await writeFileIfChanged(
9765
- path17.join(paths.wikiDir, "candidates", "index.md"),
9766
- matter8.stringify(["# Candidates", "", "- Run `swarmvault compile` to stage candidate pages.", ""].join("\n"), {
10527
+ path18.join(paths.wikiDir, "candidates", "index.md"),
10528
+ matter9.stringify(["# Candidates", "", "- Run `swarmvault compile` to stage candidate pages.", ""].join("\n"), {
9767
10529
  page_id: "candidates:index",
9768
10530
  kind: "index",
9769
10531
  title: "Candidates",
@@ -9787,6 +10549,20 @@ async function initVault(rootDir, options = {}) {
9787
10549
  await ensureObsidianWorkspace(rootDir);
9788
10550
  }
9789
10551
  }
10552
+ async function runConfiguredBenchmark(rootDir, config) {
10553
+ if (config.benchmark?.enabled === false) {
10554
+ return { ok: true };
10555
+ }
10556
+ try {
10557
+ await benchmarkVault(rootDir);
10558
+ return { ok: true };
10559
+ } catch (error) {
10560
+ return {
10561
+ ok: false,
10562
+ error: error instanceof Error ? error.message : String(error)
10563
+ };
10564
+ }
10565
+ }
9790
10566
  async function compileVault(rootDir, options = {}) {
9791
10567
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
9792
10568
  const { config, paths } = await initWorkspace(rootDir);
@@ -9802,7 +10578,7 @@ async function compileVault(rootDir, options = {}) {
9802
10578
  const currentInsightHashes = pageHashes(storedInsightPages);
9803
10579
  const previousState = await readJsonFile(paths.compileStatePath);
9804
10580
  const rootSchemaChanged = !previousState || previousState.rootSchemaHash !== schemas.root.hash;
9805
- const effectiveSchemaChanged = !previousState || previousGlobalSchemaHash(previousState) !== schemas.effective.global.hash || uniqueStrings2([...Object.keys(previousState?.effectiveSchemaHashes?.projects ?? {}), ...Object.keys(schemas.effective.projects)]).some(
10581
+ const effectiveSchemaChanged = !previousState || previousGlobalSchemaHash(previousState) !== schemas.effective.global.hash || uniqueStrings3([...Object.keys(previousState?.effectiveSchemaHashes?.projects ?? {}), ...Object.keys(schemas.effective.projects)]).some(
9806
10582
  (projectId) => previousProjectSchemaHash(previousState, projectId) !== effectiveHashForProject(schemas, projectId)
9807
10583
  );
9808
10584
  const nextProjectConfigHash = projectConfigHash(config);
@@ -9835,6 +10611,10 @@ async function compileVault(rootDir, options = {}) {
9835
10611
  }
9836
10612
  if (dirty.length === 0 && !rootSchemaChanged && !effectiveSchemaChanged && !projectConfigChanged && !sourcesChanged && !outputsChanged && !insightsChanged && !pendingCandidatePromotion && artifactsExist && !options.approve) {
9837
10613
  const graph = await readJsonFile(paths.graphPath);
10614
+ const benchmark2 = await runConfiguredBenchmark(rootDir, config);
10615
+ if (graph && benchmark2.ok) {
10616
+ await refreshIndexesAndSearch(rootDir, graph.pages);
10617
+ }
9838
10618
  await recordSession(rootDir, {
9839
10619
  operation: "compile",
9840
10620
  title: `Compiled ${manifests.length} source(s)`,
@@ -9852,7 +10632,8 @@ async function compileVault(rootDir, options = {}) {
9852
10632
  `clean=${manifests.length}`,
9853
10633
  `outputs=${outputPages.length}`,
9854
10634
  `insights=${insightPages.length}`,
9855
- `schema=${schemas.effective.global.hash.slice(0, 12)}`
10635
+ `schema=${schemas.effective.global.hash.slice(0, 12)}`,
10636
+ `benchmark=${benchmark2.ok ? "ok" : `error:${benchmark2.error}`}`
9856
10637
  ]
9857
10638
  });
9858
10639
  return {
@@ -9879,7 +10660,7 @@ async function compileVault(rootDir, options = {}) {
9879
10660
  ),
9880
10661
  Promise.all(
9881
10662
  clean.map(async (manifest) => {
9882
- const cached = await readJsonFile(path17.join(paths.analysesDir, `${manifest.sourceId}.json`));
10663
+ const cached = await readJsonFile(path18.join(paths.analysesDir, `${manifest.sourceId}.json`));
9883
10664
  if (cached) {
9884
10665
  return cached;
9885
10666
  }
@@ -9903,22 +10684,22 @@ async function compileVault(rootDir, options = {}) {
9903
10684
  }
9904
10685
  const enriched = enrichResolvedCodeImports(manifest, analysis, codeIndex);
9905
10686
  if (analysisSignature(enriched) !== analysisSignature(analysis)) {
9906
- await writeJsonFile(path17.join(paths.analysesDir, `${analysis.sourceId}.json`), enriched);
10687
+ await writeJsonFile(path18.join(paths.analysesDir, `${analysis.sourceId}.json`), enriched);
9907
10688
  }
9908
10689
  return enriched;
9909
10690
  })
9910
10691
  );
9911
10692
  await Promise.all([
9912
- ensureDir(path17.join(paths.wikiDir, "sources")),
9913
- ensureDir(path17.join(paths.wikiDir, "code")),
9914
- ensureDir(path17.join(paths.wikiDir, "concepts")),
9915
- ensureDir(path17.join(paths.wikiDir, "entities")),
9916
- ensureDir(path17.join(paths.wikiDir, "outputs")),
9917
- ensureDir(path17.join(paths.wikiDir, "projects")),
9918
- ensureDir(path17.join(paths.wikiDir, "insights")),
9919
- ensureDir(path17.join(paths.wikiDir, "candidates")),
9920
- ensureDir(path17.join(paths.wikiDir, "candidates", "concepts")),
9921
- ensureDir(path17.join(paths.wikiDir, "candidates", "entities"))
10693
+ ensureDir(path18.join(paths.wikiDir, "sources")),
10694
+ ensureDir(path18.join(paths.wikiDir, "code")),
10695
+ ensureDir(path18.join(paths.wikiDir, "concepts")),
10696
+ ensureDir(path18.join(paths.wikiDir, "entities")),
10697
+ ensureDir(path18.join(paths.wikiDir, "outputs")),
10698
+ ensureDir(path18.join(paths.wikiDir, "projects")),
10699
+ ensureDir(path18.join(paths.wikiDir, "insights")),
10700
+ ensureDir(path18.join(paths.wikiDir, "candidates")),
10701
+ ensureDir(path18.join(paths.wikiDir, "candidates", "concepts")),
10702
+ ensureDir(path18.join(paths.wikiDir, "candidates", "entities"))
9922
10703
  ]);
9923
10704
  const sync = await syncVaultArtifacts(rootDir, {
9924
10705
  schemas,
@@ -9970,6 +10751,10 @@ async function compileVault(rootDir, options = {}) {
9970
10751
  postPassApprovalDir = staged.approvalDir;
9971
10752
  }
9972
10753
  }
10754
+ const benchmark = options.approve ? { ok: true } : await runConfiguredBenchmark(rootDir, config);
10755
+ if (!options.approve && benchmark.ok) {
10756
+ await refreshIndexesAndSearch(rootDir, sync.allPages);
10757
+ }
9973
10758
  await recordSession(rootDir, {
9974
10759
  operation: "compile",
9975
10760
  title: `Compiled ${manifests.length} source(s)`,
@@ -9991,7 +10776,8 @@ async function compileVault(rootDir, options = {}) {
9991
10776
  `promoted=${sync.promotedPageIds.length}`,
9992
10777
  `staged=${sync.staged}`,
9993
10778
  `postPassApproval=${postPassApprovalId ?? "none"}`,
9994
- `schema=${schemas.effective.global.hash.slice(0, 12)}`
10779
+ `schema=${schemas.effective.global.hash.slice(0, 12)}`,
10780
+ `benchmark=${benchmark.ok ? "ok" : `error:${benchmark.error}`}`
9995
10781
  ]
9996
10782
  });
9997
10783
  return {
@@ -10060,7 +10846,7 @@ async function queryVault(rootDir, options) {
10060
10846
  assetFiles: staged.assetFiles
10061
10847
  }
10062
10848
  ]);
10063
- stagedPath = path17.join(approval.approvalDir, "wiki", staged.page.path);
10849
+ stagedPath = path18.join(approval.approvalDir, "wiki", staged.page.path);
10064
10850
  savedPageId = staged.page.id;
10065
10851
  approvalId = approval.approvalId;
10066
10852
  approvalDir = approval.approvalDir;
@@ -10261,7 +11047,7 @@ ${orchestrationNotes.join("\n")}
10261
11047
  citations: allCitations,
10262
11048
  format: outputFormat,
10263
11049
  relatedPageCount: stepPages.length,
10264
- relatedNodeCount: uniqueStrings2(stepPages.flatMap((page) => page.nodeIds)).length,
11050
+ relatedNodeCount: uniqueStrings3(stepPages.flatMap((page) => page.nodeIds)).length,
10265
11051
  projectId: stepPages[0]?.projectIds[0] ?? null
10266
11052
  });
10267
11053
  const hubInput = {
@@ -10271,7 +11057,7 @@ ${orchestrationNotes.join("\n")}
10271
11057
  citations: allCitations,
10272
11058
  schemaHash: composeVaultSchema(
10273
11059
  schemas.root,
10274
- uniqueStrings2(stepPages.flatMap((page) => page.projectIds).sort((left, right) => left.localeCompare(right))).map((projectId) => schemas.projects[projectId]).filter((schema) => Boolean(schema?.hash))
11060
+ uniqueStrings3(stepPages.flatMap((page) => page.projectIds).sort((left, right) => left.localeCompare(right))).map((projectId) => schemas.projects[projectId]).filter((schema) => Boolean(schema?.hash))
10275
11061
  ).hash,
10276
11062
  outputFormat,
10277
11063
  outputAssets: hubAssetBundle.outputAssets,
@@ -10316,9 +11102,9 @@ ${orchestrationNotes.join("\n")}
10316
11102
  approvalId = approval.approvalId;
10317
11103
  approvalDir = approval.approvalDir;
10318
11104
  stepResults.forEach((result, index) => {
10319
- result.stagedPath = path17.join(approval.approvalDir, "wiki", stagedStepPages[index]?.page.path ?? "");
11105
+ result.stagedPath = path18.join(approval.approvalDir, "wiki", stagedStepPages[index]?.page.path ?? "");
10320
11106
  });
10321
- stagedHubPath = path17.join(approval.approvalDir, "wiki", hubPage.path);
11107
+ stagedHubPath = path18.join(approval.approvalDir, "wiki", hubPage.path);
10322
11108
  } else {
10323
11109
  await refreshVaultAfterOutputSave(rootDir);
10324
11110
  }
@@ -10331,7 +11117,7 @@ ${orchestrationNotes.join("\n")}
10331
11117
  providerId: provider.id,
10332
11118
  success: true,
10333
11119
  relatedSourceIds: [...relatedSourceIds],
10334
- relatedPageIds: uniqueStrings2([...relatedPageIds, ...stepPages.map((page) => page.id), hubPage.id]),
11120
+ relatedPageIds: uniqueStrings3([...relatedPageIds, ...stepPages.map((page) => page.id), hubPage.id]),
10335
11121
  relatedNodeIds: [...relatedNodeIds],
10336
11122
  citations: allCitations,
10337
11123
  tokenUsage: tokenUsage.inputTokens > 0 || tokenUsage.outputTokens > 0 ? {
@@ -10386,7 +11172,7 @@ async function queryGraphVault(rootDir, question, options = {}) {
10386
11172
  return queryGraph(graph, question, searchResults, options);
10387
11173
  }
10388
11174
  async function benchmarkVault(rootDir, options = {}) {
10389
- const { paths } = await loadVaultConfig(rootDir);
11175
+ const { config, paths } = await loadVaultConfig(rootDir);
10390
11176
  const graph = await ensureCompiledGraph(rootDir);
10391
11177
  const manifests = await listManifests(rootDir);
10392
11178
  const pageContentsById = /* @__PURE__ */ new Map();
@@ -10398,15 +11184,17 @@ async function benchmarkVault(rootDir, options = {}) {
10398
11184
  }
10399
11185
  }
10400
11186
  for (const page of graph.pages) {
10401
- const absolutePath = path17.join(paths.wikiDir, page.path);
11187
+ const absolutePath = path18.join(paths.wikiDir, page.path);
10402
11188
  if (!await fileExists(absolutePath)) {
10403
11189
  continue;
10404
11190
  }
10405
- const parsed = matter8(await fs14.readFile(absolutePath, "utf8"));
11191
+ const parsed = matter9(await fs15.readFile(absolutePath, "utf8"));
10406
11192
  pageContentsById.set(page.id, parsed.content);
10407
11193
  }
11194
+ const configuredQuestions = (config.benchmark?.questions ?? []).map((question) => normalizeWhitespace(question)).filter(Boolean);
11195
+ const maxQuestions = Math.max(1, options.maxQuestions ?? config.benchmark?.maxQuestions ?? 3);
10408
11196
  const questions = (options.questions ?? []).map((question) => normalizeWhitespace(question)).filter(Boolean);
10409
- const sampleQuestions = questions.length ? questions : [...DEFAULT_BENCHMARK_QUESTIONS];
11197
+ const sampleQuestions = (questions.length ? questions : configuredQuestions.length ? configuredQuestions : defaultBenchmarkQuestionsForGraph(graph, maxQuestions)).slice(0, maxQuestions);
10410
11198
  const perQuestion = sampleQuestions.map((question) => {
10411
11199
  const searchResults = searchPages(paths.searchDbPath, question, { limit: 12 });
10412
11200
  const result = queryGraph(graph, question, searchResults, { budget: 12 });
@@ -10416,6 +11204,7 @@ async function benchmarkVault(rootDir, options = {}) {
10416
11204
  queryTokens: metrics.queryTokens,
10417
11205
  reduction: metrics.reduction,
10418
11206
  visitedNodeIds: result.visitedNodeIds,
11207
+ visitedEdgeIds: result.visitedEdgeIds,
10419
11208
  pageIds: result.pageIds
10420
11209
  };
10421
11210
  });
@@ -10448,15 +11237,15 @@ async function listPages(rootDir) {
10448
11237
  }
10449
11238
  async function readPage(rootDir, relativePath) {
10450
11239
  const { paths } = await loadVaultConfig(rootDir);
10451
- const absolutePath = path17.resolve(paths.wikiDir, relativePath);
11240
+ const absolutePath = path18.resolve(paths.wikiDir, relativePath);
10452
11241
  if (!absolutePath.startsWith(paths.wikiDir) || !await fileExists(absolutePath)) {
10453
11242
  return null;
10454
11243
  }
10455
- const raw = await fs14.readFile(absolutePath, "utf8");
10456
- const parsed = matter8(raw);
11244
+ const raw = await fs15.readFile(absolutePath, "utf8");
11245
+ const parsed = matter9(raw);
10457
11246
  return {
10458
11247
  path: relativePath,
10459
- title: typeof parsed.data.title === "string" ? parsed.data.title : path17.basename(relativePath, path17.extname(relativePath)),
11248
+ title: typeof parsed.data.title === "string" ? parsed.data.title : path18.basename(relativePath, path18.extname(relativePath)),
10460
11249
  frontmatter: parsed.data,
10461
11250
  content: parsed.content
10462
11251
  };
@@ -10492,7 +11281,7 @@ function structuralLintFindings(_rootDir, paths, graph, schemas, manifests, sour
10492
11281
  severity: "warning",
10493
11282
  code: "stale_page",
10494
11283
  message: `Page ${page.title} is stale because the vault schema changed.`,
10495
- pagePath: path17.join(paths.wikiDir, page.path),
11284
+ pagePath: path18.join(paths.wikiDir, page.path),
10496
11285
  relatedPageIds: [page.id]
10497
11286
  });
10498
11287
  }
@@ -10503,7 +11292,7 @@ function structuralLintFindings(_rootDir, paths, graph, schemas, manifests, sour
10503
11292
  severity: "warning",
10504
11293
  code: "stale_page",
10505
11294
  message: `Page ${page.title} is stale because source ${sourceId} changed.`,
10506
- pagePath: path17.join(paths.wikiDir, page.path),
11295
+ pagePath: path18.join(paths.wikiDir, page.path),
10507
11296
  relatedSourceIds: [sourceId],
10508
11297
  relatedPageIds: [page.id]
10509
11298
  });
@@ -10514,13 +11303,13 @@ function structuralLintFindings(_rootDir, paths, graph, schemas, manifests, sour
10514
11303
  severity: "info",
10515
11304
  code: "orphan_page",
10516
11305
  message: `Page ${page.title} has no backlinks.`,
10517
- pagePath: path17.join(paths.wikiDir, page.path),
11306
+ pagePath: path18.join(paths.wikiDir, page.path),
10518
11307
  relatedPageIds: [page.id]
10519
11308
  });
10520
11309
  }
10521
- const absolutePath = path17.join(paths.wikiDir, page.path);
11310
+ const absolutePath = path18.join(paths.wikiDir, page.path);
10522
11311
  if (await fileExists(absolutePath)) {
10523
- const content = await fs14.readFile(absolutePath, "utf8");
11312
+ const content = await fs15.readFile(absolutePath, "utf8");
10524
11313
  if (content.includes("## Claims")) {
10525
11314
  const uncited = content.split("\n").filter((line) => line.startsWith("- ") && !line.includes("[source:"));
10526
11315
  if (uncited.length) {
@@ -10580,7 +11369,7 @@ async function lintVault(rootDir, options = {}) {
10580
11369
  providerId: provider?.id,
10581
11370
  success: true,
10582
11371
  relatedPageIds: graph.pages.map((page) => page.id),
10583
- relatedSourceIds: uniqueStrings2(graph.pages.flatMap((page) => page.sourceIds)),
11372
+ relatedSourceIds: uniqueStrings3(graph.pages.flatMap((page) => page.sourceIds)),
10584
11373
  lintFindingCount: findings.length,
10585
11374
  lines: [`findings=${findings.length}`, `deep=${Boolean(options.deep)}`, `web=${Boolean(options.web)}`]
10586
11375
  });
@@ -10600,7 +11389,7 @@ async function bootstrapDemo(rootDir, input) {
10600
11389
  }
10601
11390
 
10602
11391
  // src/mcp.ts
10603
- var SERVER_VERSION = "0.1.21";
11392
+ var SERVER_VERSION = "0.1.22";
10604
11393
  async function createMcpServer(rootDir) {
10605
11394
  const server = new McpServer({
10606
11395
  name: "swarmvault",
@@ -10622,8 +11411,8 @@ async function createMcpServer(rootDir) {
10622
11411
  {
10623
11412
  description: "Search compiled wiki pages using the local full-text index.",
10624
11413
  inputSchema: {
10625
- query: z7.string().min(1).describe("Search query"),
10626
- limit: z7.number().int().min(1).max(25).optional().describe("Maximum number of results")
11414
+ query: z8.string().min(1).describe("Search query"),
11415
+ limit: z8.number().int().min(1).max(25).optional().describe("Maximum number of results")
10627
11416
  }
10628
11417
  },
10629
11418
  async ({ query, limit }) => {
@@ -10636,7 +11425,7 @@ async function createMcpServer(rootDir) {
10636
11425
  {
10637
11426
  description: "Read a generated wiki page by its path relative to wiki/.",
10638
11427
  inputSchema: {
10639
- path: z7.string().min(1).describe("Path relative to wiki/, for example sources/example.md")
11428
+ path: z8.string().min(1).describe("Path relative to wiki/, for example sources/example.md")
10640
11429
  }
10641
11430
  },
10642
11431
  async ({ path: relativePath }) => {
@@ -10652,7 +11441,7 @@ async function createMcpServer(rootDir) {
10652
11441
  {
10653
11442
  description: "List source manifests in the current workspace.",
10654
11443
  inputSchema: {
10655
- limit: z7.number().int().min(1).max(100).optional().describe("Maximum number of manifests to return")
11444
+ limit: z8.number().int().min(1).max(100).optional().describe("Maximum number of manifests to return")
10656
11445
  }
10657
11446
  },
10658
11447
  async ({ limit }) => {
@@ -10665,9 +11454,9 @@ async function createMcpServer(rootDir) {
10665
11454
  {
10666
11455
  description: "Traverse the local graph from search seeds without calling a model provider.",
10667
11456
  inputSchema: {
10668
- question: z7.string().min(1).describe("Question or graph search seed"),
10669
- traversal: z7.enum(["bfs", "dfs"]).optional().describe("Traversal strategy"),
10670
- budget: z7.number().int().min(3).max(50).optional().describe("Maximum nodes to summarize")
11457
+ question: z8.string().min(1).describe("Question or graph search seed"),
11458
+ traversal: z8.enum(["bfs", "dfs"]).optional().describe("Traversal strategy"),
11459
+ budget: z8.number().int().min(3).max(50).optional().describe("Maximum nodes to summarize")
10671
11460
  }
10672
11461
  },
10673
11462
  async ({ question, traversal, budget }) => {
@@ -10683,7 +11472,7 @@ async function createMcpServer(rootDir) {
10683
11472
  {
10684
11473
  description: "Explain a graph node, its page, community, and neighbors.",
10685
11474
  inputSchema: {
10686
- target: z7.string().min(1).describe("Node or page label/id")
11475
+ target: z8.string().min(1).describe("Node or page label/id")
10687
11476
  }
10688
11477
  },
10689
11478
  async ({ target }) => {
@@ -10695,7 +11484,7 @@ async function createMcpServer(rootDir) {
10695
11484
  {
10696
11485
  description: "Return the neighbors of a graph node or page target.",
10697
11486
  inputSchema: {
10698
- target: z7.string().min(1).describe("Node or page label/id")
11487
+ target: z8.string().min(1).describe("Node or page label/id")
10699
11488
  }
10700
11489
  },
10701
11490
  async ({ target }) => {
@@ -10708,8 +11497,8 @@ async function createMcpServer(rootDir) {
10708
11497
  {
10709
11498
  description: "Find the shortest graph path between two targets.",
10710
11499
  inputSchema: {
10711
- from: z7.string().min(1).describe("Start node/page label or id"),
10712
- to: z7.string().min(1).describe("End node/page label or id")
11500
+ from: z8.string().min(1).describe("Start node/page label or id"),
11501
+ to: z8.string().min(1).describe("End node/page label or id")
10713
11502
  }
10714
11503
  },
10715
11504
  async ({ from, to }) => {
@@ -10721,7 +11510,7 @@ async function createMcpServer(rootDir) {
10721
11510
  {
10722
11511
  description: "List the highest-connectivity graph nodes.",
10723
11512
  inputSchema: {
10724
- limit: z7.number().int().min(1).max(25).optional().describe("Maximum nodes to return")
11513
+ limit: z8.number().int().min(1).max(25).optional().describe("Maximum nodes to return")
10725
11514
  }
10726
11515
  },
10727
11516
  async ({ limit }) => {
@@ -10733,9 +11522,9 @@ async function createMcpServer(rootDir) {
10733
11522
  {
10734
11523
  description: "Ask a question against the compiled vault and optionally save the answer.",
10735
11524
  inputSchema: {
10736
- question: z7.string().min(1).describe("Question to ask the vault"),
10737
- save: z7.boolean().optional().describe("Persist the answer to wiki/outputs"),
10738
- format: z7.enum(["markdown", "report", "slides", "chart", "image"]).optional().describe("Output format")
11525
+ question: z8.string().min(1).describe("Question to ask the vault"),
11526
+ save: z8.boolean().optional().describe("Persist the answer to wiki/outputs"),
11527
+ format: z8.enum(["markdown", "report", "slides", "chart", "image"]).optional().describe("Output format")
10739
11528
  }
10740
11529
  },
10741
11530
  async ({ question, save, format }) => {
@@ -10752,7 +11541,7 @@ async function createMcpServer(rootDir) {
10752
11541
  {
10753
11542
  description: "Ingest a local file path or URL into the SwarmVault workspace.",
10754
11543
  inputSchema: {
10755
- input: z7.string().min(1).describe("Local path or URL to ingest")
11544
+ input: z8.string().min(1).describe("Local path or URL to ingest")
10756
11545
  }
10757
11546
  },
10758
11547
  async ({ input }) => {
@@ -10765,7 +11554,7 @@ async function createMcpServer(rootDir) {
10765
11554
  {
10766
11555
  description: "Compile source manifests into wiki pages, graph data, and search index.",
10767
11556
  inputSchema: {
10768
- approve: z7.boolean().optional().describe("Stage a review bundle without applying active page changes")
11557
+ approve: z8.boolean().optional().describe("Stage a review bundle without applying active page changes")
10769
11558
  }
10770
11559
  },
10771
11560
  async ({ approve }) => {
@@ -10849,7 +11638,7 @@ async function createMcpServer(rootDir) {
10849
11638
  },
10850
11639
  async () => {
10851
11640
  const { paths } = await loadVaultConfig(rootDir);
10852
- const files = (await listFilesRecursive(paths.sessionsDir)).filter((filePath) => filePath.endsWith(".md")).map((filePath) => toPosix(path18.relative(paths.sessionsDir, filePath))).sort();
11641
+ const files = (await listFilesRecursive(paths.sessionsDir)).filter((filePath) => filePath.endsWith(".md")).map((filePath) => toPosix(path19.relative(paths.sessionsDir, filePath))).sort();
10853
11642
  return asTextResource("swarmvault://sessions", JSON.stringify(files, null, 2));
10854
11643
  }
10855
11644
  );
@@ -10882,8 +11671,8 @@ async function createMcpServer(rootDir) {
10882
11671
  return asTextResource(`swarmvault://pages/${encodedPath}`, `Page not found: ${relativePath}`);
10883
11672
  }
10884
11673
  const { paths } = await loadVaultConfig(rootDir);
10885
- const absolutePath = path18.resolve(paths.wikiDir, relativePath);
10886
- return asTextResource(`swarmvault://pages/${encodedPath}`, await fs15.readFile(absolutePath, "utf8"));
11674
+ const absolutePath = path19.resolve(paths.wikiDir, relativePath);
11675
+ return asTextResource(`swarmvault://pages/${encodedPath}`, await fs16.readFile(absolutePath, "utf8"));
10887
11676
  }
10888
11677
  );
10889
11678
  server.registerResource(
@@ -10891,11 +11680,11 @@ async function createMcpServer(rootDir) {
10891
11680
  new ResourceTemplate("swarmvault://sessions/{path}", {
10892
11681
  list: async () => {
10893
11682
  const { paths } = await loadVaultConfig(rootDir);
10894
- const files = (await listFilesRecursive(paths.sessionsDir)).filter((filePath) => filePath.endsWith(".md")).map((filePath) => toPosix(path18.relative(paths.sessionsDir, filePath))).sort();
11683
+ const files = (await listFilesRecursive(paths.sessionsDir)).filter((filePath) => filePath.endsWith(".md")).map((filePath) => toPosix(path19.relative(paths.sessionsDir, filePath))).sort();
10895
11684
  return {
10896
11685
  resources: files.map((relativePath) => ({
10897
11686
  uri: `swarmvault://sessions/${encodeURIComponent(relativePath)}`,
10898
- name: path18.basename(relativePath, ".md"),
11687
+ name: path19.basename(relativePath, ".md"),
10899
11688
  title: relativePath,
10900
11689
  description: "SwarmVault session artifact",
10901
11690
  mimeType: "text/markdown"
@@ -10912,11 +11701,11 @@ async function createMcpServer(rootDir) {
10912
11701
  const { paths } = await loadVaultConfig(rootDir);
10913
11702
  const encodedPath = typeof variables.path === "string" ? variables.path : "";
10914
11703
  const relativePath = decodeURIComponent(encodedPath);
10915
- const absolutePath = path18.resolve(paths.sessionsDir, relativePath);
11704
+ const absolutePath = path19.resolve(paths.sessionsDir, relativePath);
10916
11705
  if (!absolutePath.startsWith(paths.sessionsDir) || !await fileExists(absolutePath)) {
10917
11706
  return asTextResource(`swarmvault://sessions/${encodedPath}`, `Session not found: ${relativePath}`);
10918
11707
  }
10919
- return asTextResource(`swarmvault://sessions/${encodedPath}`, await fs15.readFile(absolutePath, "utf8"));
11708
+ return asTextResource(`swarmvault://sessions/${encodedPath}`, await fs16.readFile(absolutePath, "utf8"));
10920
11709
  }
10921
11710
  );
10922
11711
  return server;
@@ -10964,13 +11753,13 @@ function asTextResource(uri, text) {
10964
11753
  }
10965
11754
 
10966
11755
  // src/schedule.ts
10967
- import fs16 from "fs/promises";
10968
- import path19 from "path";
11756
+ import fs17 from "fs/promises";
11757
+ import path20 from "path";
10969
11758
  function scheduleStatePath(schedulesDir, jobId) {
10970
- return path19.join(schedulesDir, `${encodeURIComponent(jobId)}.json`);
11759
+ return path20.join(schedulesDir, `${encodeURIComponent(jobId)}.json`);
10971
11760
  }
10972
11761
  function scheduleLockPath(schedulesDir, jobId) {
10973
- return path19.join(schedulesDir, `${encodeURIComponent(jobId)}.lock`);
11762
+ return path20.join(schedulesDir, `${encodeURIComponent(jobId)}.lock`);
10974
11763
  }
10975
11764
  function parseEveryDuration(value) {
10976
11765
  const match = value.trim().match(/^(\d+)(m|h|d)$/i);
@@ -11073,13 +11862,13 @@ async function acquireJobLease(rootDir, jobId) {
11073
11862
  const { paths } = await loadVaultConfig(rootDir);
11074
11863
  const leasePath = scheduleLockPath(paths.schedulesDir, jobId);
11075
11864
  await ensureDir(paths.schedulesDir);
11076
- const handle = await fs16.open(leasePath, "wx");
11865
+ const handle = await fs17.open(leasePath, "wx");
11077
11866
  await handle.writeFile(`${process.pid}
11078
11867
  ${(/* @__PURE__ */ new Date()).toISOString()}
11079
11868
  `);
11080
11869
  await handle.close();
11081
11870
  return async () => {
11082
- await fs16.rm(leasePath, { force: true });
11871
+ await fs17.rm(leasePath, { force: true });
11083
11872
  };
11084
11873
  }
11085
11874
  async function listSchedules(rootDir) {
@@ -11227,15 +12016,15 @@ async function serveSchedules(rootDir, pollMs = 3e4) {
11227
12016
 
11228
12017
  // src/viewer.ts
11229
12018
  import { execFile } from "child_process";
11230
- import fs17 from "fs/promises";
12019
+ import fs18 from "fs/promises";
11231
12020
  import http from "http";
11232
- import path21 from "path";
12021
+ import path22 from "path";
11233
12022
  import { promisify } from "util";
11234
- import matter9 from "gray-matter";
12023
+ import matter10 from "gray-matter";
11235
12024
  import mime2 from "mime-types";
11236
12025
 
11237
12026
  // src/watch.ts
11238
- import path20 from "path";
12027
+ import path21 from "path";
11239
12028
  import process2 from "process";
11240
12029
  import chokidar from "chokidar";
11241
12030
  var MAX_BACKOFF_MS = 3e4;
@@ -11243,15 +12032,15 @@ var BACKOFF_THRESHOLD = 3;
11243
12032
  var CRITICAL_THRESHOLD = 10;
11244
12033
  var REPO_WATCH_IGNORES = /* @__PURE__ */ new Set([".git", "node_modules", "dist", "build", ".next", "coverage", ".venv", "vendor", "target"]);
11245
12034
  function withinRoot2(rootPath, targetPath) {
11246
- const relative = path20.relative(rootPath, targetPath);
11247
- return relative === "" || !relative.startsWith("..") && !path20.isAbsolute(relative);
12035
+ const relative = path21.relative(rootPath, targetPath);
12036
+ return relative === "" || !relative.startsWith("..") && !path21.isAbsolute(relative);
11248
12037
  }
11249
12038
  function hasIgnoredRepoSegment(baseDir, targetPath) {
11250
- const relativePath = path20.relative(baseDir, targetPath);
12039
+ const relativePath = path21.relative(baseDir, targetPath);
11251
12040
  if (!relativePath || relativePath.startsWith("..")) {
11252
12041
  return false;
11253
12042
  }
11254
- return relativePath.split(path20.sep).some((segment) => REPO_WATCH_IGNORES.has(segment));
12043
+ return relativePath.split(path21.sep).some((segment) => REPO_WATCH_IGNORES.has(segment));
11255
12044
  }
11256
12045
  function workspaceIgnoreRoots(rootDir, paths) {
11257
12046
  return [
@@ -11260,16 +12049,16 @@ function workspaceIgnoreRoots(rootDir, paths) {
11260
12049
  paths.stateDir,
11261
12050
  paths.agentDir,
11262
12051
  paths.inboxDir,
11263
- path20.join(rootDir, ".claude"),
11264
- path20.join(rootDir, ".cursor"),
11265
- path20.join(rootDir, ".obsidian")
11266
- ].map((candidate) => path20.resolve(candidate));
12052
+ path21.join(rootDir, ".claude"),
12053
+ path21.join(rootDir, ".cursor"),
12054
+ path21.join(rootDir, ".obsidian")
12055
+ ].map((candidate) => path21.resolve(candidate));
11267
12056
  }
11268
12057
  async function resolveWatchTargets(rootDir, paths, options) {
11269
- const targets = /* @__PURE__ */ new Set([path20.resolve(paths.inboxDir)]);
12058
+ const targets = /* @__PURE__ */ new Set([path21.resolve(paths.inboxDir)]);
11270
12059
  if (options.repo) {
11271
12060
  for (const repoRoot of await listTrackedRepoRoots(rootDir)) {
11272
- targets.add(path20.resolve(repoRoot));
12061
+ targets.add(path21.resolve(repoRoot));
11273
12062
  }
11274
12063
  }
11275
12064
  return [...targets].sort((left, right) => left.localeCompare(right));
@@ -11399,7 +12188,7 @@ async function watchVault(rootDir, options = {}) {
11399
12188
  const { paths } = await initWorkspace(rootDir);
11400
12189
  const baseDebounceMs = options.debounceMs ?? 900;
11401
12190
  const ignoredRoots = workspaceIgnoreRoots(rootDir, paths);
11402
- const inboxWatchRoot = path20.resolve(paths.inboxDir);
12191
+ const inboxWatchRoot = path21.resolve(paths.inboxDir);
11403
12192
  let watchTargets = await resolveWatchTargets(rootDir, paths, options);
11404
12193
  let timer;
11405
12194
  let running = false;
@@ -11413,7 +12202,7 @@ async function watchVault(rootDir, options = {}) {
11413
12202
  usePolling: true,
11414
12203
  interval: 100,
11415
12204
  ignored: (targetPath) => {
11416
- const absolutePath = path20.resolve(targetPath);
12205
+ const absolutePath = path21.resolve(targetPath);
11417
12206
  const primaryTarget = watchTargets.filter((watchTarget) => withinRoot2(watchTarget, absolutePath)).sort((left, right) => right.length - left.length)[0] ?? null;
11418
12207
  if (!primaryTarget) {
11419
12208
  return false;
@@ -11585,8 +12374,8 @@ async function watchVault(rootDir, options = {}) {
11585
12374
  }
11586
12375
  };
11587
12376
  const reasonForPath = (targetPath) => {
11588
- const baseDir = watchTargets.filter((watchTarget) => withinRoot2(watchTarget, path20.resolve(targetPath))).sort((left, right) => right.length - left.length)[0] ?? paths.inboxDir;
11589
- return path20.relative(baseDir, targetPath) || ".";
12377
+ const baseDir = watchTargets.filter((watchTarget) => withinRoot2(watchTarget, path21.resolve(targetPath))).sort((left, right) => right.length - left.length)[0] ?? paths.inboxDir;
12378
+ return path21.relative(baseDir, targetPath) || ".";
11590
12379
  };
11591
12380
  watcher.on("add", (filePath) => schedule(`add:${reasonForPath(filePath)}`)).on("change", (filePath) => schedule(`change:${reasonForPath(filePath)}`)).on("unlink", (filePath) => schedule(`unlink:${reasonForPath(filePath)}`)).on("addDir", (dirPath) => schedule(`addDir:${reasonForPath(dirPath)}`)).on("unlinkDir", (dirPath) => schedule(`unlinkDir:${reasonForPath(dirPath)}`)).on("error", (caught) => schedule(`error:${caught instanceof Error ? caught.message : String(caught)}`));
11592
12381
  await new Promise((resolve, reject) => {
@@ -11627,15 +12416,15 @@ async function getWatchStatus(rootDir) {
11627
12416
  var execFileAsync = promisify(execFile);
11628
12417
  async function readViewerPage(rootDir, relativePath) {
11629
12418
  const { paths } = await loadVaultConfig(rootDir);
11630
- const absolutePath = path21.resolve(paths.wikiDir, relativePath);
12419
+ const absolutePath = path22.resolve(paths.wikiDir, relativePath);
11631
12420
  if (!absolutePath.startsWith(paths.wikiDir) || !await fileExists(absolutePath)) {
11632
12421
  return null;
11633
12422
  }
11634
- const raw = await fs17.readFile(absolutePath, "utf8");
11635
- const parsed = matter9(raw);
12423
+ const raw = await fs18.readFile(absolutePath, "utf8");
12424
+ const parsed = matter10(raw);
11636
12425
  return {
11637
12426
  path: relativePath,
11638
- title: typeof parsed.data.title === "string" ? parsed.data.title : path21.basename(relativePath, path21.extname(relativePath)),
12427
+ title: typeof parsed.data.title === "string" ? parsed.data.title : path22.basename(relativePath, path22.extname(relativePath)),
11639
12428
  frontmatter: parsed.data,
11640
12429
  content: parsed.content,
11641
12430
  assets: normalizeOutputAssets(parsed.data.output_assets)
@@ -11643,12 +12432,12 @@ async function readViewerPage(rootDir, relativePath) {
11643
12432
  }
11644
12433
  async function readViewerAsset(rootDir, relativePath) {
11645
12434
  const { paths } = await loadVaultConfig(rootDir);
11646
- const absolutePath = path21.resolve(paths.wikiDir, relativePath);
12435
+ const absolutePath = path22.resolve(paths.wikiDir, relativePath);
11647
12436
  if (!absolutePath.startsWith(paths.wikiDir) || !await fileExists(absolutePath)) {
11648
12437
  return null;
11649
12438
  }
11650
12439
  return {
11651
- buffer: await fs17.readFile(absolutePath),
12440
+ buffer: await fs18.readFile(absolutePath),
11652
12441
  mimeType: mime2.lookup(absolutePath) || "application/octet-stream"
11653
12442
  };
11654
12443
  }
@@ -11671,12 +12460,12 @@ async function readJsonBody(request) {
11671
12460
  return JSON.parse(raw);
11672
12461
  }
11673
12462
  async function ensureViewerDist(viewerDistDir) {
11674
- const indexPath = path21.join(viewerDistDir, "index.html");
12463
+ const indexPath = path22.join(viewerDistDir, "index.html");
11675
12464
  if (await fileExists(indexPath)) {
11676
12465
  return;
11677
12466
  }
11678
- const viewerProjectDir = path21.dirname(viewerDistDir);
11679
- if (await fileExists(path21.join(viewerProjectDir, "package.json"))) {
12467
+ const viewerProjectDir = path22.dirname(viewerDistDir);
12468
+ if (await fileExists(path22.join(viewerProjectDir, "package.json"))) {
11680
12469
  await execFileAsync("pnpm", ["build"], { cwd: viewerProjectDir });
11681
12470
  }
11682
12471
  }
@@ -11693,7 +12482,7 @@ async function startGraphServer(rootDir, port) {
11693
12482
  return;
11694
12483
  }
11695
12484
  response.writeHead(200, { "content-type": "application/json" });
11696
- response.end(await fs17.readFile(paths.graphPath, "utf8"));
12485
+ response.end(await fs18.readFile(paths.graphPath, "utf8"));
11697
12486
  return;
11698
12487
  }
11699
12488
  if (url.pathname === "/api/graph/query") {
@@ -11735,16 +12524,29 @@ async function startGraphServer(rootDir, port) {
11735
12524
  const kind = url.searchParams.get("kind") ?? "all";
11736
12525
  const status = url.searchParams.get("status") ?? "all";
11737
12526
  const project = url.searchParams.get("project") ?? "all";
12527
+ const sourceType = url.searchParams.get("sourceType") ?? "all";
11738
12528
  const results = searchPages(paths.searchDbPath, query, {
11739
12529
  limit: Number.isFinite(limit) ? limit : 10,
11740
12530
  kind,
11741
12531
  status,
11742
- project
12532
+ project,
12533
+ sourceType
11743
12534
  });
11744
12535
  response.writeHead(200, { "content-type": "application/json" });
11745
12536
  response.end(JSON.stringify(results));
11746
12537
  return;
11747
12538
  }
12539
+ if (url.pathname === "/api/graph-report") {
12540
+ const reportPath = path22.join(paths.wikiDir, "graph", "report.json");
12541
+ if (!await fileExists(reportPath)) {
12542
+ response.writeHead(404, { "content-type": "application/json" });
12543
+ response.end(JSON.stringify({ error: "Graph report artifact not found. Run `swarmvault compile` first." }));
12544
+ return;
12545
+ }
12546
+ response.writeHead(200, { "content-type": "application/json" });
12547
+ response.end(await fs18.readFile(reportPath, "utf8"));
12548
+ return;
12549
+ }
11748
12550
  if (url.pathname === "/api/watch-status") {
11749
12551
  response.writeHead(200, { "content-type": "application/json" });
11750
12552
  response.end(JSON.stringify(await getWatchStatus(rootDir)));
@@ -11825,8 +12627,8 @@ async function startGraphServer(rootDir, port) {
11825
12627
  return;
11826
12628
  }
11827
12629
  const relativePath = url.pathname === "/" ? "index.html" : url.pathname.slice(1);
11828
- const target = path21.join(paths.viewerDistDir, relativePath);
11829
- const fallback = path21.join(paths.viewerDistDir, "index.html");
12630
+ const target = path22.join(paths.viewerDistDir, relativePath);
12631
+ const fallback = path22.join(paths.viewerDistDir, "index.html");
11830
12632
  const filePath = await fileExists(target) ? target : fallback;
11831
12633
  if (!await fileExists(filePath)) {
11832
12634
  response.writeHead(503, { "content-type": "text/plain" });
@@ -11834,7 +12636,7 @@ async function startGraphServer(rootDir, port) {
11834
12636
  return;
11835
12637
  }
11836
12638
  response.writeHead(200, { "content-type": mime2.lookup(filePath) || "text/plain" });
11837
- response.end(await fs17.readFile(filePath));
12639
+ response.end(await fs18.readFile(filePath));
11838
12640
  });
11839
12641
  await new Promise((resolve) => {
11840
12642
  server.listen(effectivePort, resolve);
@@ -11861,7 +12663,7 @@ async function exportGraphHtml(rootDir, outputPath) {
11861
12663
  throw new Error("Graph artifact not found. Run `swarmvault compile` first.");
11862
12664
  }
11863
12665
  await ensureViewerDist(paths.viewerDistDir);
11864
- const indexPath = path21.join(paths.viewerDistDir, "index.html");
12666
+ const indexPath = path22.join(paths.viewerDistDir, "index.html");
11865
12667
  if (!await fileExists(indexPath)) {
11866
12668
  throw new Error("Viewer build not found. Run `pnpm build` first.");
11867
12669
  }
@@ -11874,6 +12676,7 @@ async function exportGraphHtml(rootDir, outputPath) {
11874
12676
  title: loaded.title,
11875
12677
  kind: page.kind,
11876
12678
  status: page.status,
12679
+ sourceType: page.sourceType,
11877
12680
  projectIds: page.projectIds,
11878
12681
  content: loaded.content,
11879
12682
  assets: await Promise.all(
@@ -11885,17 +12688,18 @@ async function exportGraphHtml(rootDir, outputPath) {
11885
12688
  } : null;
11886
12689
  })
11887
12690
  );
11888
- const rawHtml = await fs17.readFile(indexPath, "utf8");
12691
+ const rawHtml = await fs18.readFile(indexPath, "utf8");
11889
12692
  const scriptMatch = rawHtml.match(/<script type="module" crossorigin src="([^"]+)"><\/script>/);
11890
12693
  const styleMatch = rawHtml.match(/<link rel="stylesheet" crossorigin href="([^"]+)">/);
11891
- const scriptPath = scriptMatch?.[1] ? path21.join(paths.viewerDistDir, scriptMatch[1].replace(/^\//, "")) : null;
11892
- const stylePath = styleMatch?.[1] ? path21.join(paths.viewerDistDir, styleMatch[1].replace(/^\//, "")) : null;
12694
+ const scriptPath = scriptMatch?.[1] ? path22.join(paths.viewerDistDir, scriptMatch[1].replace(/^\//, "")) : null;
12695
+ const stylePath = styleMatch?.[1] ? path22.join(paths.viewerDistDir, styleMatch[1].replace(/^\//, "")) : null;
11893
12696
  if (!scriptPath || !await fileExists(scriptPath)) {
11894
12697
  throw new Error("Viewer script bundle not found. Run `pnpm build` first.");
11895
12698
  }
11896
- const script = await fs17.readFile(scriptPath, "utf8");
11897
- const style = stylePath && await fileExists(stylePath) ? await fs17.readFile(stylePath, "utf8") : "";
11898
- const embeddedData = JSON.stringify({ graph, pages: pages.filter(Boolean) }, null, 2).replace(/</g, "\\u003c");
12699
+ const script = await fs18.readFile(scriptPath, "utf8");
12700
+ const style = stylePath && await fileExists(stylePath) ? await fs18.readFile(stylePath, "utf8") : "";
12701
+ const report = await readJsonFile(path22.join(paths.wikiDir, "graph", "report.json"));
12702
+ const embeddedData = JSON.stringify({ graph, pages: pages.filter(Boolean), report }, null, 2).replace(/</g, "\\u003c");
11899
12703
  const html = [
11900
12704
  "<!doctype html>",
11901
12705
  '<html lang="en">',
@@ -11913,9 +12717,9 @@ async function exportGraphHtml(rootDir, outputPath) {
11913
12717
  "</html>",
11914
12718
  ""
11915
12719
  ].filter(Boolean).join("\n");
11916
- await fs17.mkdir(path21.dirname(outputPath), { recursive: true });
11917
- await fs17.writeFile(outputPath, html, "utf8");
11918
- return path21.resolve(outputPath);
12720
+ await fs18.mkdir(path22.dirname(outputPath), { recursive: true });
12721
+ await fs18.writeFile(outputPath, html, "utf8");
12722
+ return path22.resolve(outputPath);
11919
12723
  }
11920
12724
  export {
11921
12725
  acceptApproval,