@swarmvaultai/engine 0.1.21 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -3
- package/dist/chunk-6UPHDGEB.js +1073 -0
- package/dist/index.d.ts +123 -2
- package/dist/index.js +1422 -618
- package/dist/registry-6KZMA3XM.js +12 -0
- package/dist/viewer/assets/index-f8JPYMw_.js +330 -0
- package/dist/viewer/index.html +1 -1
- package/dist/viewer/lib.d.ts +52 -1
- package/dist/viewer/lib.js +23 -4
- package/package.json +2 -1
- package/dist/viewer/assets/index-DEETVhXx.js +0 -330
package/dist/index.js
CHANGED
|
@@ -21,7 +21,7 @@ import {
|
|
|
21
21
|
uniqueBy,
|
|
22
22
|
writeFileIfChanged,
|
|
23
23
|
writeJsonFile
|
|
24
|
-
} from "./chunk-
|
|
24
|
+
} from "./chunk-6UPHDGEB.js";
|
|
25
25
|
|
|
26
26
|
// src/agents.ts
|
|
27
27
|
import fs from "fs/promises";
|
|
@@ -593,9 +593,10 @@ async function uninstallGitHooks(rootDir) {
|
|
|
593
593
|
}
|
|
594
594
|
|
|
595
595
|
// src/ingest.ts
|
|
596
|
-
import
|
|
597
|
-
import
|
|
596
|
+
import fs9 from "fs/promises";
|
|
597
|
+
import path9 from "path";
|
|
598
598
|
import { Readability } from "@mozilla/readability";
|
|
599
|
+
import matter3 from "gray-matter";
|
|
599
600
|
import ignore from "ignore";
|
|
600
601
|
import { JSDOM } from "jsdom";
|
|
601
602
|
import mime from "mime-types";
|
|
@@ -2728,9 +2729,10 @@ async function analyzeCodeSource(manifest, extractedText, schemaHash) {
|
|
|
2728
2729
|
const language = manifest.language ?? inferCodeLanguage(manifest.originalPath ?? manifest.storedPath, manifest.mimeType) ?? "typescript";
|
|
2729
2730
|
const { code, rationales } = language === "javascript" || language === "jsx" || language === "typescript" || language === "tsx" ? analyzeTypeScriptLikeCode(manifest, extractedText) : await analyzeTreeSitterCode(manifest, extractedText, language);
|
|
2730
2731
|
return {
|
|
2731
|
-
analysisVersion:
|
|
2732
|
+
analysisVersion: 5,
|
|
2732
2733
|
sourceId: manifest.sourceId,
|
|
2733
2734
|
sourceHash: manifest.contentHash,
|
|
2735
|
+
extractionHash: manifest.extractionHash,
|
|
2734
2736
|
schemaHash,
|
|
2735
2737
|
title: manifest.title,
|
|
2736
2738
|
summary: summarizeModule(manifest, code),
|
|
@@ -2744,19 +2746,247 @@ async function analyzeCodeSource(manifest, extractedText, schemaHash) {
|
|
|
2744
2746
|
};
|
|
2745
2747
|
}
|
|
2746
2748
|
|
|
2747
|
-
// src/
|
|
2749
|
+
// src/extraction.ts
|
|
2748
2750
|
import fs6 from "fs/promises";
|
|
2751
|
+
import os from "os";
|
|
2749
2752
|
import path6 from "path";
|
|
2753
|
+
import { z } from "zod";
|
|
2754
|
+
var imageVisionExtractionSchema = z.object({
|
|
2755
|
+
title: z.string().min(1).nullable().optional(),
|
|
2756
|
+
summary: z.string().min(1),
|
|
2757
|
+
text: z.string().default(""),
|
|
2758
|
+
concepts: z.array(
|
|
2759
|
+
z.object({
|
|
2760
|
+
name: z.string().min(1),
|
|
2761
|
+
description: z.string().default("")
|
|
2762
|
+
})
|
|
2763
|
+
).max(12).default([]),
|
|
2764
|
+
entities: z.array(
|
|
2765
|
+
z.object({
|
|
2766
|
+
name: z.string().min(1),
|
|
2767
|
+
description: z.string().default("")
|
|
2768
|
+
})
|
|
2769
|
+
).max(12).default([]),
|
|
2770
|
+
claims: z.array(
|
|
2771
|
+
z.object({
|
|
2772
|
+
text: z.string().min(1),
|
|
2773
|
+
confidence: z.number().min(0).max(1).default(0.65),
|
|
2774
|
+
polarity: z.enum(["positive", "negative", "neutral"]).default("neutral")
|
|
2775
|
+
})
|
|
2776
|
+
).max(8).default([]),
|
|
2777
|
+
questions: z.array(z.string().min(1)).max(6).default([])
|
|
2778
|
+
});
|
|
2779
|
+
function extractionMetadata(sourceKind, mimeType, extractor) {
|
|
2780
|
+
return {
|
|
2781
|
+
extractor,
|
|
2782
|
+
sourceKind,
|
|
2783
|
+
mimeType,
|
|
2784
|
+
producedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
2785
|
+
};
|
|
2786
|
+
}
|
|
2787
|
+
function buildExtractionHash(extractedText, artifact) {
|
|
2788
|
+
if (!extractedText && !artifact) {
|
|
2789
|
+
return void 0;
|
|
2790
|
+
}
|
|
2791
|
+
const normalizedArtifact = artifact ? {
|
|
2792
|
+
...artifact,
|
|
2793
|
+
producedAt: void 0
|
|
2794
|
+
} : null;
|
|
2795
|
+
return sha256(
|
|
2796
|
+
JSON.stringify({
|
|
2797
|
+
extractedText: extractedText ?? null,
|
|
2798
|
+
artifact: normalizedArtifact
|
|
2799
|
+
})
|
|
2800
|
+
);
|
|
2801
|
+
}
|
|
2802
|
+
function createPlainTextExtractionArtifact(sourceKind, mimeType) {
|
|
2803
|
+
return extractionMetadata(sourceKind, mimeType, "plain_text");
|
|
2804
|
+
}
|
|
2805
|
+
function createHtmlReadabilityExtractionArtifact(sourceKind, mimeType) {
|
|
2806
|
+
return extractionMetadata(sourceKind, mimeType, "html_readability");
|
|
2807
|
+
}
|
|
2808
|
+
function normalizeVisionMarkdown(payload) {
|
|
2809
|
+
const sections = [];
|
|
2810
|
+
if (payload.summary.trim()) {
|
|
2811
|
+
sections.push(payload.summary.trim());
|
|
2812
|
+
}
|
|
2813
|
+
if (payload.text.trim()) {
|
|
2814
|
+
sections.push(payload.text.trim());
|
|
2815
|
+
}
|
|
2816
|
+
if (payload.claims.length) {
|
|
2817
|
+
sections.push(payload.claims.map((claim) => `- ${claim.text}`).join("\n"));
|
|
2818
|
+
}
|
|
2819
|
+
return sections.join("\n\n").trim();
|
|
2820
|
+
}
|
|
2821
|
+
async function materializeAttachmentPath(input) {
|
|
2822
|
+
if (input.filePath) {
|
|
2823
|
+
return {
|
|
2824
|
+
filePath: input.filePath,
|
|
2825
|
+
cleanup: async () => {
|
|
2826
|
+
}
|
|
2827
|
+
};
|
|
2828
|
+
}
|
|
2829
|
+
if (!input.bytes) {
|
|
2830
|
+
throw new Error("Image extraction requires a file path or bytes.");
|
|
2831
|
+
}
|
|
2832
|
+
const tempDir = await fs6.mkdtemp(path6.join(os.tmpdir(), "swarmvault-image-extract-"));
|
|
2833
|
+
const extension = input.mimeType.split("/")[1]?.split("+")[0] ?? "bin";
|
|
2834
|
+
const tempPath = path6.join(tempDir, `source.${extension}`);
|
|
2835
|
+
await fs6.writeFile(tempPath, input.bytes);
|
|
2836
|
+
return {
|
|
2837
|
+
filePath: tempPath,
|
|
2838
|
+
cleanup: async () => {
|
|
2839
|
+
await fs6.rm(tempDir, { recursive: true, force: true });
|
|
2840
|
+
}
|
|
2841
|
+
};
|
|
2842
|
+
}
|
|
2843
|
+
async function extractImageWithVision(rootDir, input) {
|
|
2844
|
+
let provider;
|
|
2845
|
+
try {
|
|
2846
|
+
provider = await getProviderForTask(rootDir, "visionProvider");
|
|
2847
|
+
} catch (error) {
|
|
2848
|
+
return {
|
|
2849
|
+
artifact: {
|
|
2850
|
+
...extractionMetadata("image", input.mimeType, "image_vision"),
|
|
2851
|
+
warnings: [`Vision extraction unavailable: ${error instanceof Error ? error.message : "provider not configured"}`]
|
|
2852
|
+
}
|
|
2853
|
+
};
|
|
2854
|
+
}
|
|
2855
|
+
if (provider.type === "heuristic" || !provider.capabilities.has("vision") || !provider.capabilities.has("structured")) {
|
|
2856
|
+
return {
|
|
2857
|
+
artifact: {
|
|
2858
|
+
...extractionMetadata("image", input.mimeType, "image_vision"),
|
|
2859
|
+
warnings: [`Vision extraction unavailable for provider ${provider.id}. Configure a structured multimodal provider.`]
|
|
2860
|
+
}
|
|
2861
|
+
};
|
|
2862
|
+
}
|
|
2863
|
+
const attachment = await materializeAttachmentPath(input);
|
|
2864
|
+
try {
|
|
2865
|
+
const parsed = await provider.generateStructured(
|
|
2866
|
+
{
|
|
2867
|
+
system: [
|
|
2868
|
+
"You extract grounded notes from a single image for a local-first knowledge vault.",
|
|
2869
|
+
"Only describe content that is actually visible.",
|
|
2870
|
+
"If the image contains text, transcribe it accurately.",
|
|
2871
|
+
"If the image is a diagram or screenshot, summarize the key visible relationships and labels without speculation."
|
|
2872
|
+
].join("\n"),
|
|
2873
|
+
prompt: [
|
|
2874
|
+
`Source title: ${input.title}`,
|
|
2875
|
+
"Return structured extraction for this image.",
|
|
2876
|
+
"Include a concise summary, OCR-style text, grounded concepts/entities, visible claims, and follow-up questions."
|
|
2877
|
+
].join("\n"),
|
|
2878
|
+
attachments: [{ mimeType: input.mimeType, filePath: attachment.filePath }]
|
|
2879
|
+
},
|
|
2880
|
+
imageVisionExtractionSchema
|
|
2881
|
+
);
|
|
2882
|
+
const artifact = {
|
|
2883
|
+
...extractionMetadata("image", input.mimeType, "image_vision"),
|
|
2884
|
+
providerId: provider.id,
|
|
2885
|
+
providerModel: provider.model,
|
|
2886
|
+
vision: {
|
|
2887
|
+
title: parsed.title ?? void 0,
|
|
2888
|
+
summary: parsed.summary,
|
|
2889
|
+
text: parsed.text,
|
|
2890
|
+
concepts: parsed.concepts,
|
|
2891
|
+
entities: parsed.entities,
|
|
2892
|
+
claims: parsed.claims,
|
|
2893
|
+
questions: parsed.questions
|
|
2894
|
+
}
|
|
2895
|
+
};
|
|
2896
|
+
return {
|
|
2897
|
+
title: parsed.title ?? void 0,
|
|
2898
|
+
extractedText: normalizeVisionMarkdown(parsed),
|
|
2899
|
+
artifact
|
|
2900
|
+
};
|
|
2901
|
+
} catch (error) {
|
|
2902
|
+
return {
|
|
2903
|
+
artifact: {
|
|
2904
|
+
...extractionMetadata("image", input.mimeType, "image_vision"),
|
|
2905
|
+
providerId: provider.id,
|
|
2906
|
+
providerModel: provider.model,
|
|
2907
|
+
warnings: [`Vision extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
|
|
2908
|
+
}
|
|
2909
|
+
};
|
|
2910
|
+
} finally {
|
|
2911
|
+
await attachment.cleanup();
|
|
2912
|
+
}
|
|
2913
|
+
}
|
|
2914
|
+
function normalizePdfMetadata(raw) {
|
|
2915
|
+
if (!raw || typeof raw !== "object") {
|
|
2916
|
+
return void 0;
|
|
2917
|
+
}
|
|
2918
|
+
const metadata = {};
|
|
2919
|
+
for (const [key, value] of Object.entries(raw)) {
|
|
2920
|
+
if (typeof value === "string") {
|
|
2921
|
+
const cleaned = normalizeWhitespace(value);
|
|
2922
|
+
if (cleaned) {
|
|
2923
|
+
metadata[key] = cleaned;
|
|
2924
|
+
}
|
|
2925
|
+
}
|
|
2926
|
+
}
|
|
2927
|
+
return Object.keys(metadata).length ? metadata : void 0;
|
|
2928
|
+
}
|
|
2929
|
+
async function extractPdfText(input) {
|
|
2930
|
+
try {
|
|
2931
|
+
const pdfjs = await import("pdfjs-dist/legacy/build/pdf.mjs");
|
|
2932
|
+
const task = pdfjs.getDocument({
|
|
2933
|
+
data: new Uint8Array(input.bytes),
|
|
2934
|
+
useWorkerFetch: false,
|
|
2935
|
+
isEvalSupported: false,
|
|
2936
|
+
disableFontFace: true,
|
|
2937
|
+
verbosity: 0
|
|
2938
|
+
});
|
|
2939
|
+
const document = await task.promise;
|
|
2940
|
+
const pageTexts = [];
|
|
2941
|
+
for (let pageNumber = 1; pageNumber <= document.numPages; pageNumber += 1) {
|
|
2942
|
+
const page = await document.getPage(pageNumber);
|
|
2943
|
+
const textContent = await page.getTextContent();
|
|
2944
|
+
const pageText = normalizeWhitespace(
|
|
2945
|
+
textContent.items.map((item) => typeof item === "object" && item && "str" in item && typeof item.str === "string" ? item.str : "").join(" ")
|
|
2946
|
+
);
|
|
2947
|
+
if (pageText) {
|
|
2948
|
+
pageTexts.push(pageText);
|
|
2949
|
+
}
|
|
2950
|
+
page.cleanup();
|
|
2951
|
+
}
|
|
2952
|
+
const metadataResult = await document.getMetadata().catch(() => null);
|
|
2953
|
+
await task.destroy();
|
|
2954
|
+
const extractedText = pageTexts.join("\n\n").trim();
|
|
2955
|
+
const artifact = {
|
|
2956
|
+
...extractionMetadata("pdf", input.mimeType, "pdf_text"),
|
|
2957
|
+
pageCount: document.numPages,
|
|
2958
|
+
metadata: normalizePdfMetadata(metadataResult?.info)
|
|
2959
|
+
};
|
|
2960
|
+
if (!extractedText) {
|
|
2961
|
+
artifact.warnings = ["PDF text extraction completed but produced no extractable text."];
|
|
2962
|
+
}
|
|
2963
|
+
return {
|
|
2964
|
+
extractedText: extractedText || void 0,
|
|
2965
|
+
artifact
|
|
2966
|
+
};
|
|
2967
|
+
} catch (error) {
|
|
2968
|
+
return {
|
|
2969
|
+
artifact: {
|
|
2970
|
+
...extractionMetadata("pdf", input.mimeType, "pdf_text"),
|
|
2971
|
+
warnings: [`PDF text extraction failed: ${error instanceof Error ? truncate(error.message, 240) : "unknown error"}`]
|
|
2972
|
+
}
|
|
2973
|
+
};
|
|
2974
|
+
}
|
|
2975
|
+
}
|
|
2976
|
+
|
|
2977
|
+
// src/logs.ts
|
|
2978
|
+
import fs7 from "fs/promises";
|
|
2979
|
+
import path7 from "path";
|
|
2750
2980
|
import matter from "gray-matter";
|
|
2751
2981
|
async function resolveUniqueSessionPath(rootDir, operation, title, startedAt) {
|
|
2752
2982
|
const { paths } = await initWorkspace(rootDir);
|
|
2753
2983
|
await ensureDir(paths.sessionsDir);
|
|
2754
2984
|
const timestamp = startedAt.replace(/[:.]/g, "-");
|
|
2755
2985
|
const baseName = `${timestamp}-${operation}-${slugify(title)}`;
|
|
2756
|
-
let candidate =
|
|
2986
|
+
let candidate = path7.join(paths.sessionsDir, `${baseName}.md`);
|
|
2757
2987
|
let counter = 2;
|
|
2758
2988
|
while (await fileExists(candidate)) {
|
|
2759
|
-
candidate =
|
|
2989
|
+
candidate = path7.join(paths.sessionsDir, `${baseName}-${counter}.md`);
|
|
2760
2990
|
counter++;
|
|
2761
2991
|
}
|
|
2762
2992
|
return candidate;
|
|
@@ -2764,11 +2994,11 @@ async function resolveUniqueSessionPath(rootDir, operation, title, startedAt) {
|
|
|
2764
2994
|
async function appendLogEntry(rootDir, action, title, lines = []) {
|
|
2765
2995
|
const { paths } = await initWorkspace(rootDir);
|
|
2766
2996
|
await ensureDir(paths.wikiDir);
|
|
2767
|
-
const logPath =
|
|
2997
|
+
const logPath = path7.join(paths.wikiDir, "log.md");
|
|
2768
2998
|
const timestamp = (/* @__PURE__ */ new Date()).toISOString().slice(0, 19).replace("T", " ");
|
|
2769
2999
|
const entry = [`## [${timestamp}] ${action} | ${title}`, ...lines.map((line) => `- ${line}`), ""].join("\n");
|
|
2770
|
-
const existing = await fileExists(logPath) ? await
|
|
2771
|
-
await
|
|
3000
|
+
const existing = await fileExists(logPath) ? await fs7.readFile(logPath, "utf8") : "# Log\n\n";
|
|
3001
|
+
await fs7.writeFile(logPath, `${existing}${entry}
|
|
2772
3002
|
`, "utf8");
|
|
2773
3003
|
}
|
|
2774
3004
|
async function recordSession(rootDir, input) {
|
|
@@ -2778,8 +3008,8 @@ async function recordSession(rootDir, input) {
|
|
|
2778
3008
|
const finishedAtIso = new Date(input.finishedAt ?? input.startedAt).toISOString();
|
|
2779
3009
|
const durationMs = Math.max(0, new Date(finishedAtIso).getTime() - new Date(startedAtIso).getTime());
|
|
2780
3010
|
const sessionPath = await resolveUniqueSessionPath(rootDir, input.operation, input.title, startedAtIso);
|
|
2781
|
-
const sessionId =
|
|
2782
|
-
const relativeSessionPath =
|
|
3011
|
+
const sessionId = path7.basename(sessionPath, ".md");
|
|
3012
|
+
const relativeSessionPath = path7.relative(rootDir, sessionPath).split(path7.sep).join(path7.posix.sep);
|
|
2783
3013
|
const frontmatter = Object.fromEntries(
|
|
2784
3014
|
Object.entries({
|
|
2785
3015
|
session_id: sessionId,
|
|
@@ -2827,7 +3057,7 @@ async function recordSession(rootDir, input) {
|
|
|
2827
3057
|
frontmatter
|
|
2828
3058
|
);
|
|
2829
3059
|
await writeFileIfChanged(sessionPath, content);
|
|
2830
|
-
const logPath =
|
|
3060
|
+
const logPath = path7.join(paths.wikiDir, "log.md");
|
|
2831
3061
|
const timestamp = startedAtIso.slice(0, 19).replace("T", " ");
|
|
2832
3062
|
const entry = [
|
|
2833
3063
|
`## [${timestamp}] ${input.operation} | ${input.title}`,
|
|
@@ -2835,8 +3065,8 @@ async function recordSession(rootDir, input) {
|
|
|
2835
3065
|
...(input.lines ?? []).map((line) => `- ${line}`),
|
|
2836
3066
|
""
|
|
2837
3067
|
].join("\n");
|
|
2838
|
-
const existing = await fileExists(logPath) ? await
|
|
2839
|
-
await
|
|
3068
|
+
const existing = await fileExists(logPath) ? await fs7.readFile(logPath, "utf8") : "# Log\n\n";
|
|
3069
|
+
await fs7.writeFile(logPath, `${existing}${entry}
|
|
2840
3070
|
`, "utf8");
|
|
2841
3071
|
return { sessionPath, sessionId };
|
|
2842
3072
|
}
|
|
@@ -2846,8 +3076,8 @@ async function appendWatchRun(rootDir, run) {
|
|
|
2846
3076
|
}
|
|
2847
3077
|
|
|
2848
3078
|
// src/watch-state.ts
|
|
2849
|
-
import
|
|
2850
|
-
import
|
|
3079
|
+
import fs8 from "fs/promises";
|
|
3080
|
+
import path8 from "path";
|
|
2851
3081
|
import matter2 from "gray-matter";
|
|
2852
3082
|
function pendingEntryKey(entry) {
|
|
2853
3083
|
return entry.path;
|
|
@@ -2861,7 +3091,7 @@ function normalizeRelativePath(rootDir, filePath) {
|
|
|
2861
3091
|
if (!filePath) {
|
|
2862
3092
|
return void 0;
|
|
2863
3093
|
}
|
|
2864
|
-
return toPosix(
|
|
3094
|
+
return toPosix(path8.relative(rootDir, path8.resolve(filePath)));
|
|
2865
3095
|
}
|
|
2866
3096
|
async function readPendingSemanticRefresh(rootDir) {
|
|
2867
3097
|
const { paths } = await initWorkspace(rootDir);
|
|
@@ -2955,11 +3185,11 @@ async function markPagesStaleForSources(rootDir, sourceIds) {
|
|
|
2955
3185
|
if (page.freshness !== "stale" || !page.sourceIds.some((sourceId) => affectedSourceIds.has(sourceId))) {
|
|
2956
3186
|
continue;
|
|
2957
3187
|
}
|
|
2958
|
-
const absolutePath =
|
|
3188
|
+
const absolutePath = path8.join(paths.wikiDir, page.path);
|
|
2959
3189
|
if (!await fileExists(absolutePath)) {
|
|
2960
3190
|
continue;
|
|
2961
3191
|
}
|
|
2962
|
-
const raw = await
|
|
3192
|
+
const raw = await fs8.readFile(absolutePath, "utf8");
|
|
2963
3193
|
const parsed = matter2(raw);
|
|
2964
3194
|
if (parsed.data.freshness === "stale") {
|
|
2965
3195
|
continue;
|
|
@@ -2975,6 +3205,9 @@ async function markPagesStaleForSources(rootDir, sourceIds) {
|
|
|
2975
3205
|
var DEFAULT_MAX_ASSET_SIZE = 10 * 1024 * 1024;
|
|
2976
3206
|
var DEFAULT_MAX_DIRECTORY_FILES = 5e3;
|
|
2977
3207
|
var BUILT_IN_REPO_IGNORES = /* @__PURE__ */ new Set([".git", "node_modules", "dist", "build", ".next", "coverage", ".venv", "vendor", "target"]);
|
|
3208
|
+
function uniqueStrings(values) {
|
|
3209
|
+
return [...new Set(values.filter(Boolean))];
|
|
3210
|
+
}
|
|
2978
3211
|
function inferKind(mimeType, filePath) {
|
|
2979
3212
|
if (inferCodeLanguage(filePath, mimeType)) {
|
|
2980
3213
|
return "code";
|
|
@@ -3007,7 +3240,7 @@ function normalizeIngestOptions(options) {
|
|
|
3007
3240
|
return {
|
|
3008
3241
|
includeAssets: options?.includeAssets ?? true,
|
|
3009
3242
|
maxAssetSize: Math.max(0, Math.floor(options?.maxAssetSize ?? DEFAULT_MAX_ASSET_SIZE)),
|
|
3010
|
-
repoRoot: options?.repoRoot ?
|
|
3243
|
+
repoRoot: options?.repoRoot ? path9.resolve(options.repoRoot) : void 0,
|
|
3011
3244
|
include: (options?.include ?? []).map((pattern) => pattern.trim()).filter(Boolean),
|
|
3012
3245
|
exclude: (options?.exclude ?? []).map((pattern) => pattern.trim()).filter(Boolean),
|
|
3013
3246
|
maxFiles: Math.max(1, Math.floor(options?.maxFiles ?? DEFAULT_MAX_DIRECTORY_FILES)),
|
|
@@ -3016,27 +3249,27 @@ function normalizeIngestOptions(options) {
|
|
|
3016
3249
|
}
|
|
3017
3250
|
function matchesAnyGlob(relativePath, patterns) {
|
|
3018
3251
|
return patterns.some(
|
|
3019
|
-
(pattern) =>
|
|
3252
|
+
(pattern) => path9.matchesGlob(relativePath, pattern) || path9.matchesGlob(path9.posix.basename(relativePath), pattern)
|
|
3020
3253
|
);
|
|
3021
3254
|
}
|
|
3022
3255
|
function supportedDirectoryKind(sourceKind) {
|
|
3023
3256
|
return sourceKind !== "binary";
|
|
3024
3257
|
}
|
|
3025
3258
|
async function findNearestGitRoot2(startPath) {
|
|
3026
|
-
let current =
|
|
3259
|
+
let current = path9.resolve(startPath);
|
|
3027
3260
|
try {
|
|
3028
|
-
const stat = await
|
|
3261
|
+
const stat = await fs9.stat(current);
|
|
3029
3262
|
if (!stat.isDirectory()) {
|
|
3030
|
-
current =
|
|
3263
|
+
current = path9.dirname(current);
|
|
3031
3264
|
}
|
|
3032
3265
|
} catch {
|
|
3033
|
-
current =
|
|
3266
|
+
current = path9.dirname(current);
|
|
3034
3267
|
}
|
|
3035
3268
|
while (true) {
|
|
3036
|
-
if (await fileExists(
|
|
3269
|
+
if (await fileExists(path9.join(current, ".git"))) {
|
|
3037
3270
|
return current;
|
|
3038
3271
|
}
|
|
3039
|
-
const parent =
|
|
3272
|
+
const parent = path9.dirname(current);
|
|
3040
3273
|
if (parent === current) {
|
|
3041
3274
|
return null;
|
|
3042
3275
|
}
|
|
@@ -3044,26 +3277,26 @@ async function findNearestGitRoot2(startPath) {
|
|
|
3044
3277
|
}
|
|
3045
3278
|
}
|
|
3046
3279
|
function withinRoot(rootPath, targetPath) {
|
|
3047
|
-
const relative =
|
|
3048
|
-
return relative === "" || !relative.startsWith("..") && !
|
|
3280
|
+
const relative = path9.relative(rootPath, targetPath);
|
|
3281
|
+
return relative === "" || !relative.startsWith("..") && !path9.isAbsolute(relative);
|
|
3049
3282
|
}
|
|
3050
3283
|
function repoRootFromManifest(manifest) {
|
|
3051
3284
|
if (manifest.originType !== "file" || !manifest.originalPath || !manifest.repoRelativePath) {
|
|
3052
3285
|
return null;
|
|
3053
3286
|
}
|
|
3054
|
-
const repoDir =
|
|
3055
|
-
const fileDir =
|
|
3287
|
+
const repoDir = path9.posix.dirname(manifest.repoRelativePath);
|
|
3288
|
+
const fileDir = path9.dirname(path9.resolve(manifest.originalPath));
|
|
3056
3289
|
if (repoDir === "." || !repoDir) {
|
|
3057
3290
|
return fileDir;
|
|
3058
3291
|
}
|
|
3059
3292
|
const segments = repoDir.split("/").filter(Boolean);
|
|
3060
|
-
return
|
|
3293
|
+
return path9.resolve(fileDir, ...segments.map(() => ".."));
|
|
3061
3294
|
}
|
|
3062
3295
|
function repoRelativePathFor(absolutePath, repoRoot) {
|
|
3063
3296
|
if (!repoRoot || !withinRoot(repoRoot, absolutePath)) {
|
|
3064
3297
|
return void 0;
|
|
3065
3298
|
}
|
|
3066
|
-
const relative = toPosix(
|
|
3299
|
+
const relative = toPosix(path9.relative(repoRoot, absolutePath));
|
|
3067
3300
|
return relative && !relative.startsWith("..") ? relative : void 0;
|
|
3068
3301
|
}
|
|
3069
3302
|
function normalizeOriginUrl(input) {
|
|
@@ -3092,6 +3325,22 @@ function arxivIdFromInput(input) {
|
|
|
3092
3325
|
return null;
|
|
3093
3326
|
}
|
|
3094
3327
|
}
|
|
3328
|
+
function doiFromInput(input) {
|
|
3329
|
+
const trimmed = input.trim();
|
|
3330
|
+
if (/^10\.\S+\/\S+$/i.test(trimmed)) {
|
|
3331
|
+
return trimmed.replace(/\s+/g, "");
|
|
3332
|
+
}
|
|
3333
|
+
try {
|
|
3334
|
+
const url = new URL(trimmed);
|
|
3335
|
+
if (url.hostname === "doi.org" || url.hostname === "dx.doi.org") {
|
|
3336
|
+
const doi = decodeURIComponent(url.pathname.replace(/^\/+/, ""));
|
|
3337
|
+
return /^10\.\S+\/\S+$/i.test(doi) ? doi : null;
|
|
3338
|
+
}
|
|
3339
|
+
} catch {
|
|
3340
|
+
return null;
|
|
3341
|
+
}
|
|
3342
|
+
return null;
|
|
3343
|
+
}
|
|
3095
3344
|
function isTweetUrl(input) {
|
|
3096
3345
|
try {
|
|
3097
3346
|
const url = new URL(input);
|
|
@@ -3101,26 +3350,25 @@ function isTweetUrl(input) {
|
|
|
3101
3350
|
}
|
|
3102
3351
|
}
|
|
3103
3352
|
function markdownFrontmatter(value) {
|
|
3104
|
-
const
|
|
3105
|
-
|
|
3106
|
-
|
|
3107
|
-
|
|
3108
|
-
|
|
3109
|
-
|
|
3110
|
-
}
|
|
3111
|
-
lines.push("---", "");
|
|
3112
|
-
return lines;
|
|
3353
|
+
const normalized = Object.fromEntries(
|
|
3354
|
+
Object.entries(value).filter(
|
|
3355
|
+
([, rawValue]) => Array.isArray(rawValue) ? rawValue.length > 0 : Boolean(typeof rawValue === "string" ? rawValue.trim() : rawValue)
|
|
3356
|
+
)
|
|
3357
|
+
);
|
|
3358
|
+
return matter3.stringify("", normalized).trimEnd().split("\n").concat([""]);
|
|
3113
3359
|
}
|
|
3114
3360
|
function prepareCapturedMarkdownInput(input) {
|
|
3115
3361
|
return {
|
|
3116
3362
|
title: input.title,
|
|
3117
3363
|
originType: "url",
|
|
3118
3364
|
sourceKind: "markdown",
|
|
3365
|
+
sourceType: input.sourceType,
|
|
3119
3366
|
url: normalizeOriginUrl(input.url),
|
|
3120
3367
|
mimeType: "text/markdown",
|
|
3121
3368
|
storedExtension: ".md",
|
|
3122
3369
|
payloadBytes: Buffer.from(input.markdown, "utf8"),
|
|
3123
3370
|
extractedText: input.markdown,
|
|
3371
|
+
attachments: input.attachments,
|
|
3124
3372
|
logDetails: input.logDetails
|
|
3125
3373
|
};
|
|
3126
3374
|
}
|
|
@@ -3131,6 +3379,17 @@ async function fetchText(url) {
|
|
|
3131
3379
|
}
|
|
3132
3380
|
return response.text();
|
|
3133
3381
|
}
|
|
3382
|
+
async function fetchResolvedText(url) {
|
|
3383
|
+
const response = await fetch(url);
|
|
3384
|
+
if (!response.ok) {
|
|
3385
|
+
throw new Error(`Failed to fetch ${url}: ${response.status} ${response.statusText}`);
|
|
3386
|
+
}
|
|
3387
|
+
return {
|
|
3388
|
+
text: await response.text(),
|
|
3389
|
+
finalUrl: normalizeOriginUrl(response.url || url),
|
|
3390
|
+
contentType: response.headers.get("content-type")?.split(";")[0]?.trim() || "text/html"
|
|
3391
|
+
};
|
|
3392
|
+
}
|
|
3134
3393
|
function domTextFromHtml(html, baseUrl) {
|
|
3135
3394
|
const dom = new JSDOM(`<body>${html}</body>`, { url: baseUrl });
|
|
3136
3395
|
return normalizeWhitespace(dom.window.document.body.textContent ?? "");
|
|
@@ -3150,11 +3409,16 @@ async function captureArxivMarkdown(input, options) {
|
|
|
3150
3409
|
const authors = [...document.querySelectorAll('meta[name="citation_author"]')].map((node) => node.getAttribute("content")?.trim()).filter((value) => Boolean(value));
|
|
3151
3410
|
const authorsText = authors.join(", ") || stripLeadingLabel(document.querySelector(".authors")?.textContent?.trim() ?? "", "Authors:");
|
|
3152
3411
|
const abstract = stripLeadingLabel(document.querySelector("blockquote.abstract")?.textContent?.trim() ?? "", "Abstract:");
|
|
3412
|
+
const categories = [...document.querySelectorAll(".subheader .primary-subject, .metatable .tablecell.subjects")].flatMap((node) => (node.textContent ?? "").split(/;/g)).map((value) => value.trim()).filter(Boolean);
|
|
3153
3413
|
const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
3154
3414
|
const markdown = [
|
|
3155
3415
|
...markdownFrontmatter({
|
|
3156
|
-
|
|
3416
|
+
source_type: "arxiv",
|
|
3157
3417
|
source_url: normalizedUrl,
|
|
3418
|
+
canonical_url: normalizedUrl,
|
|
3419
|
+
title,
|
|
3420
|
+
authors,
|
|
3421
|
+
tags: uniqueStrings(categories),
|
|
3158
3422
|
arxiv_id: arxivId,
|
|
3159
3423
|
author: options.author,
|
|
3160
3424
|
contributor: options.contributor,
|
|
@@ -3194,8 +3458,11 @@ async function captureTweetMarkdown(input, options) {
|
|
|
3194
3458
|
const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
3195
3459
|
const markdown = [
|
|
3196
3460
|
...markdownFrontmatter({
|
|
3197
|
-
|
|
3461
|
+
source_type: "tweet",
|
|
3198
3462
|
source_url: normalizedUrl,
|
|
3463
|
+
canonical_url: canonicalUrl,
|
|
3464
|
+
title,
|
|
3465
|
+
authors: postAuthor ? [postAuthor] : void 0,
|
|
3199
3466
|
author: options.author,
|
|
3200
3467
|
contributor: options.contributor,
|
|
3201
3468
|
captured_at: capturedAt
|
|
@@ -3217,6 +3484,101 @@ async function captureTweetMarkdown(input, options) {
|
|
|
3217
3484
|
].join("\n");
|
|
3218
3485
|
return { title, normalizedUrl, markdown };
|
|
3219
3486
|
}
|
|
3487
|
+
function firstMetaContent(document, selectors) {
|
|
3488
|
+
for (const selector of selectors) {
|
|
3489
|
+
const value = document.querySelector(selector)?.getAttribute("content")?.trim();
|
|
3490
|
+
if (value) {
|
|
3491
|
+
return value;
|
|
3492
|
+
}
|
|
3493
|
+
}
|
|
3494
|
+
return void 0;
|
|
3495
|
+
}
|
|
3496
|
+
function metaContents(document, selectors) {
|
|
3497
|
+
return uniqueStrings(
|
|
3498
|
+
selectors.flatMap(
|
|
3499
|
+
(selector) => [...document.querySelectorAll(selector)].map((node) => node.getAttribute("content")?.trim() ?? "").filter(Boolean)
|
|
3500
|
+
)
|
|
3501
|
+
);
|
|
3502
|
+
}
|
|
3503
|
+
function splitKeywords(value) {
|
|
3504
|
+
return uniqueStrings(
|
|
3505
|
+
(value ?? "").split(/[;,]/g).map((item) => item.trim()).filter(Boolean)
|
|
3506
|
+
);
|
|
3507
|
+
}
|
|
3508
|
+
async function captureArticleMarkdown(rootDir, input, options, extra = { sourceType: "article" }) {
|
|
3509
|
+
const resolved = await fetchResolvedText(input);
|
|
3510
|
+
if (!resolved.contentType.includes("html")) {
|
|
3511
|
+
throw new Error(`Unsupported article content type: ${resolved.contentType}`);
|
|
3512
|
+
}
|
|
3513
|
+
const dom = new JSDOM(resolved.text, { url: resolved.finalUrl });
|
|
3514
|
+
const document = dom.window.document;
|
|
3515
|
+
const canonicalHref = document.querySelector('link[rel="canonical"]')?.getAttribute("href")?.trim();
|
|
3516
|
+
const canonicalUrl = canonicalHref ? normalizeOriginUrl(new URL(canonicalHref, resolved.finalUrl).toString()) : resolved.finalUrl;
|
|
3517
|
+
const title = firstMetaContent(document, ['meta[name="citation_title"]', 'meta[property="og:title"]', 'meta[name="twitter:title"]']) ?? (document.title.trim() || canonicalUrl);
|
|
3518
|
+
const authors = uniqueStrings([
|
|
3519
|
+
...metaContents(document, ['meta[name="citation_author"]']),
|
|
3520
|
+
...metaContents(document, ['meta[name="author"]', 'meta[property="article:author"]'])
|
|
3521
|
+
]);
|
|
3522
|
+
const publishedAt = firstMetaContent(document, [
|
|
3523
|
+
'meta[name="citation_publication_date"]',
|
|
3524
|
+
'meta[name="citation_online_date"]',
|
|
3525
|
+
'meta[property="article:published_time"]',
|
|
3526
|
+
'meta[name="pubdate"]'
|
|
3527
|
+
]);
|
|
3528
|
+
const updatedAt = firstMetaContent(document, ['meta[property="article:modified_time"]', 'meta[name="lastmod"]']);
|
|
3529
|
+
const tags = uniqueStrings([
|
|
3530
|
+
...metaContents(document, ['meta[property="article:tag"]']),
|
|
3531
|
+
...splitKeywords(firstMetaContent(document, ['meta[name="keywords"]']))
|
|
3532
|
+
]);
|
|
3533
|
+
const inferredDoi = extra.doi ?? firstMetaContent(document, ['meta[name="citation_doi"]', 'meta[name="dc.identifier"]'])?.replace(/^doi:\s*/i, "") ?? void 0;
|
|
3534
|
+
const normalizedOptions = normalizeIngestOptions(options);
|
|
3535
|
+
const prepared = await prepareUrlInput(rootDir, canonicalUrl, normalizedOptions);
|
|
3536
|
+
if (prepared.sourceKind !== "markdown" && prepared.sourceKind !== "text") {
|
|
3537
|
+
throw new Error(`Unsupported prepared article kind: ${prepared.sourceKind}`);
|
|
3538
|
+
}
|
|
3539
|
+
const body = prepared.extractedText ?? prepared.payloadBytes.toString("utf8");
|
|
3540
|
+
const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
3541
|
+
const markdown = [
|
|
3542
|
+
...markdownFrontmatter({
|
|
3543
|
+
source_type: extra.sourceType,
|
|
3544
|
+
source_url: extra.sourceUrl ?? input,
|
|
3545
|
+
canonical_url: canonicalUrl,
|
|
3546
|
+
title,
|
|
3547
|
+
authors,
|
|
3548
|
+
published_at: publishedAt,
|
|
3549
|
+
updated_at: updatedAt,
|
|
3550
|
+
doi: inferredDoi,
|
|
3551
|
+
tags,
|
|
3552
|
+
author: options.author,
|
|
3553
|
+
contributor: options.contributor,
|
|
3554
|
+
captured_at: capturedAt
|
|
3555
|
+
}),
|
|
3556
|
+
body.trim(),
|
|
3557
|
+
"",
|
|
3558
|
+
"## Source",
|
|
3559
|
+
"",
|
|
3560
|
+
`- URL: ${canonicalUrl}`,
|
|
3561
|
+
...extra.sourceType === "doi" && inferredDoi ? [`- DOI: ${inferredDoi}`] : [],
|
|
3562
|
+
""
|
|
3563
|
+
].join("\n");
|
|
3564
|
+
return {
|
|
3565
|
+
title,
|
|
3566
|
+
normalizedUrl: canonicalUrl,
|
|
3567
|
+
markdown,
|
|
3568
|
+
attachments: prepared.attachments
|
|
3569
|
+
};
|
|
3570
|
+
}
|
|
3571
|
+
async function captureDoiMarkdown(rootDir, input, options) {
|
|
3572
|
+
const doi = doiFromInput(input);
|
|
3573
|
+
if (!doi) {
|
|
3574
|
+
throw new Error(`Could not determine a DOI from ${input}`);
|
|
3575
|
+
}
|
|
3576
|
+
return captureArticleMarkdown(rootDir, `https://doi.org/${encodeURIComponent(doi)}`, options, {
|
|
3577
|
+
sourceType: "doi",
|
|
3578
|
+
sourceUrl: input,
|
|
3579
|
+
doi
|
|
3580
|
+
});
|
|
3581
|
+
}
|
|
3220
3582
|
function manifestMatchesOrigin(manifest, prepared) {
|
|
3221
3583
|
if (prepared.originType === "url") {
|
|
3222
3584
|
return Boolean(prepared.url && manifest.url && normalizeOriginUrl(manifest.url) === normalizeOriginUrl(prepared.url));
|
|
@@ -3231,7 +3593,7 @@ function buildCompositeHash(payloadBytes, attachments = []) {
|
|
|
3231
3593
|
return sha256(`${sha256(payloadBytes)}|${attachmentSignature}`);
|
|
3232
3594
|
}
|
|
3233
3595
|
function sanitizeAssetRelativePath(value) {
|
|
3234
|
-
const normalized =
|
|
3596
|
+
const normalized = path9.posix.normalize(value.replace(/\\/g, "/"));
|
|
3235
3597
|
const segments = normalized.split("/").filter(Boolean).map((segment) => {
|
|
3236
3598
|
if (segment === ".") {
|
|
3237
3599
|
return "";
|
|
@@ -3251,7 +3613,7 @@ function normalizeLocalReference(value) {
|
|
|
3251
3613
|
return null;
|
|
3252
3614
|
}
|
|
3253
3615
|
const lowered = candidate.toLowerCase();
|
|
3254
|
-
if (lowered.startsWith("http://") || lowered.startsWith("https://") || lowered.startsWith("data:") || lowered.startsWith("mailto:") || lowered.startsWith("#") ||
|
|
3616
|
+
if (lowered.startsWith("http://") || lowered.startsWith("https://") || lowered.startsWith("data:") || lowered.startsWith("mailto:") || lowered.startsWith("#") || path9.isAbsolute(candidate)) {
|
|
3255
3617
|
return null;
|
|
3256
3618
|
}
|
|
3257
3619
|
return candidate.replace(/\\/g, "/");
|
|
@@ -3313,12 +3675,12 @@ async function convertHtmlToMarkdown(html, url) {
|
|
|
3313
3675
|
};
|
|
3314
3676
|
}
|
|
3315
3677
|
async function readManifestByHash(manifestsDir, contentHash) {
|
|
3316
|
-
const entries = await
|
|
3678
|
+
const entries = await fs9.readdir(manifestsDir, { withFileTypes: true }).catch(() => []);
|
|
3317
3679
|
for (const entry of entries) {
|
|
3318
3680
|
if (!entry.isFile() || !entry.name.endsWith(".json")) {
|
|
3319
3681
|
continue;
|
|
3320
3682
|
}
|
|
3321
|
-
const manifest = await readJsonFile(
|
|
3683
|
+
const manifest = await readJsonFile(path9.join(manifestsDir, entry.name));
|
|
3322
3684
|
if (manifest?.contentHash === contentHash) {
|
|
3323
3685
|
return manifest;
|
|
3324
3686
|
}
|
|
@@ -3326,12 +3688,12 @@ async function readManifestByHash(manifestsDir, contentHash) {
|
|
|
3326
3688
|
return null;
|
|
3327
3689
|
}
|
|
3328
3690
|
async function readManifestByOrigin(manifestsDir, prepared) {
|
|
3329
|
-
const entries = await
|
|
3691
|
+
const entries = await fs9.readdir(manifestsDir, { withFileTypes: true }).catch(() => []);
|
|
3330
3692
|
for (const entry of entries) {
|
|
3331
3693
|
if (!entry.isFile() || !entry.name.endsWith(".json")) {
|
|
3332
3694
|
continue;
|
|
3333
3695
|
}
|
|
3334
|
-
const manifest = await readJsonFile(
|
|
3696
|
+
const manifest = await readJsonFile(path9.join(manifestsDir, entry.name));
|
|
3335
3697
|
if (manifest && manifestMatchesOrigin(manifest, prepared)) {
|
|
3336
3698
|
return manifest;
|
|
3337
3699
|
}
|
|
@@ -3342,12 +3704,12 @@ async function loadGitignoreMatcher(repoRoot, enabled) {
|
|
|
3342
3704
|
if (!enabled) {
|
|
3343
3705
|
return null;
|
|
3344
3706
|
}
|
|
3345
|
-
const gitignorePath =
|
|
3707
|
+
const gitignorePath = path9.join(repoRoot, ".gitignore");
|
|
3346
3708
|
if (!await fileExists(gitignorePath)) {
|
|
3347
3709
|
return null;
|
|
3348
3710
|
}
|
|
3349
3711
|
const matcher = ignore();
|
|
3350
|
-
matcher.add(await
|
|
3712
|
+
matcher.add(await fs9.readFile(gitignorePath, "utf8"));
|
|
3351
3713
|
return matcher;
|
|
3352
3714
|
}
|
|
3353
3715
|
function builtInIgnoreReason(relativePath) {
|
|
@@ -3368,23 +3730,23 @@ async function collectDirectoryFiles(rootDir, inputDir, repoRoot, options) {
|
|
|
3368
3730
|
if (!currentDir) {
|
|
3369
3731
|
continue;
|
|
3370
3732
|
}
|
|
3371
|
-
const entries = await
|
|
3733
|
+
const entries = await fs9.readdir(currentDir, { withFileTypes: true });
|
|
3372
3734
|
entries.sort((left, right) => left.name.localeCompare(right.name));
|
|
3373
3735
|
for (const entry of entries) {
|
|
3374
|
-
const absolutePath =
|
|
3375
|
-
const relativeToRepo = repoRelativePathFor(absolutePath, repoRoot) ?? toPosix(
|
|
3736
|
+
const absolutePath = path9.join(currentDir, entry.name);
|
|
3737
|
+
const relativeToRepo = repoRelativePathFor(absolutePath, repoRoot) ?? toPosix(path9.relative(inputDir, absolutePath));
|
|
3376
3738
|
const relativePath = relativeToRepo || entry.name;
|
|
3377
3739
|
const builtInReason = builtInIgnoreReason(relativePath);
|
|
3378
3740
|
if (builtInReason) {
|
|
3379
|
-
skipped.push({ path: toPosix(
|
|
3741
|
+
skipped.push({ path: toPosix(path9.relative(rootDir, absolutePath)), reason: builtInReason });
|
|
3380
3742
|
continue;
|
|
3381
3743
|
}
|
|
3382
3744
|
if (matcher?.ignores(relativePath)) {
|
|
3383
|
-
skipped.push({ path: toPosix(
|
|
3745
|
+
skipped.push({ path: toPosix(path9.relative(rootDir, absolutePath)), reason: "gitignore" });
|
|
3384
3746
|
continue;
|
|
3385
3747
|
}
|
|
3386
3748
|
if (matchesAnyGlob(relativePath, options.exclude)) {
|
|
3387
|
-
skipped.push({ path: toPosix(
|
|
3749
|
+
skipped.push({ path: toPosix(path9.relative(rootDir, absolutePath)), reason: "exclude_glob" });
|
|
3388
3750
|
continue;
|
|
3389
3751
|
}
|
|
3390
3752
|
if (entry.isDirectory()) {
|
|
@@ -3392,21 +3754,21 @@ async function collectDirectoryFiles(rootDir, inputDir, repoRoot, options) {
|
|
|
3392
3754
|
continue;
|
|
3393
3755
|
}
|
|
3394
3756
|
if (!entry.isFile()) {
|
|
3395
|
-
skipped.push({ path: toPosix(
|
|
3757
|
+
skipped.push({ path: toPosix(path9.relative(rootDir, absolutePath)), reason: "unsupported_entry" });
|
|
3396
3758
|
continue;
|
|
3397
3759
|
}
|
|
3398
3760
|
if (options.include.length > 0 && !matchesAnyGlob(relativePath, options.include)) {
|
|
3399
|
-
skipped.push({ path: toPosix(
|
|
3761
|
+
skipped.push({ path: toPosix(path9.relative(rootDir, absolutePath)), reason: "include_glob" });
|
|
3400
3762
|
continue;
|
|
3401
3763
|
}
|
|
3402
3764
|
const mimeType = guessMimeType(absolutePath);
|
|
3403
3765
|
const sourceKind = inferKind(mimeType, absolutePath);
|
|
3404
3766
|
if (!supportedDirectoryKind(sourceKind)) {
|
|
3405
|
-
skipped.push({ path: toPosix(
|
|
3767
|
+
skipped.push({ path: toPosix(path9.relative(rootDir, absolutePath)), reason: `unsupported_kind:${sourceKind}` });
|
|
3406
3768
|
continue;
|
|
3407
3769
|
}
|
|
3408
3770
|
if (files.length >= options.maxFiles) {
|
|
3409
|
-
skipped.push({ path: toPosix(
|
|
3771
|
+
skipped.push({ path: toPosix(path9.relative(rootDir, absolutePath)), reason: "max_files" });
|
|
3410
3772
|
continue;
|
|
3411
3773
|
}
|
|
3412
3774
|
files.push(absolutePath);
|
|
@@ -3428,12 +3790,12 @@ function resolveUrlMimeType(input, response) {
|
|
|
3428
3790
|
function buildRemoteAssetRelativePath(assetUrl, mimeType) {
|
|
3429
3791
|
const url = new URL(assetUrl);
|
|
3430
3792
|
const normalized = sanitizeAssetRelativePath(`${url.hostname}${url.pathname || "/asset"}`);
|
|
3431
|
-
const extension =
|
|
3432
|
-
const directory =
|
|
3433
|
-
const basename = extension ?
|
|
3793
|
+
const extension = path9.posix.extname(normalized);
|
|
3794
|
+
const directory = path9.posix.dirname(normalized);
|
|
3795
|
+
const basename = extension ? path9.posix.basename(normalized, extension) : path9.posix.basename(normalized);
|
|
3434
3796
|
const resolvedExtension = extension || `.${mime.extension(mimeType) || "bin"}`;
|
|
3435
3797
|
const hashedName = `${basename || "asset"}-${sha256(assetUrl).slice(0, 8)}${resolvedExtension}`;
|
|
3436
|
-
return directory === "." ? hashedName :
|
|
3798
|
+
return directory === "." ? hashedName : path9.posix.join(directory, hashedName);
|
|
3437
3799
|
}
|
|
3438
3800
|
async function readResponseBytesWithinLimit(response, maxBytes) {
|
|
3439
3801
|
const contentLength = Number.parseInt(response.headers.get("content-length") ?? "", 10);
|
|
@@ -3557,9 +3919,10 @@ async function persistPreparedInput(rootDir, prepared, paths) {
|
|
|
3557
3919
|
await ensureDir(paths.extractsDir);
|
|
3558
3920
|
const attachments = prepared.attachments ?? [];
|
|
3559
3921
|
const contentHash = prepared.contentHash ?? buildCompositeHash(prepared.payloadBytes, attachments);
|
|
3922
|
+
const extractionHash = prepared.extractionHash ?? buildExtractionHash(prepared.extractedText, prepared.extractionArtifact);
|
|
3560
3923
|
const existingByOrigin = await readManifestByOrigin(paths.manifestsDir, prepared);
|
|
3561
3924
|
const existingByHash = existingByOrigin ? null : await readManifestByHash(paths.manifestsDir, contentHash);
|
|
3562
|
-
if (existingByOrigin && existingByOrigin.contentHash === contentHash && existingByOrigin.title === prepared.title && existingByOrigin.sourceKind === prepared.sourceKind && existingByOrigin.language === prepared.language && existingByOrigin.mimeType === prepared.mimeType && existingByOrigin.repoRelativePath === prepared.repoRelativePath) {
|
|
3925
|
+
if (existingByOrigin && existingByOrigin.contentHash === contentHash && existingByOrigin.extractionHash === extractionHash && existingByOrigin.title === prepared.title && existingByOrigin.sourceKind === prepared.sourceKind && existingByOrigin.sourceType === prepared.sourceType && existingByOrigin.language === prepared.language && existingByOrigin.mimeType === prepared.mimeType && existingByOrigin.repoRelativePath === prepared.repoRelativePath) {
|
|
3563
3926
|
return { manifest: existingByOrigin, isNew: false, wasUpdated: false };
|
|
3564
3927
|
}
|
|
3565
3928
|
if (existingByHash) {
|
|
@@ -3568,27 +3931,34 @@ async function persistPreparedInput(rootDir, prepared, paths) {
|
|
|
3568
3931
|
const previous = existingByOrigin ?? void 0;
|
|
3569
3932
|
const sourceId = previous?.sourceId ?? `${slugify(prepared.title)}-${contentHash.slice(0, 8)}`;
|
|
3570
3933
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
3571
|
-
const storedPath =
|
|
3572
|
-
const extractedTextPath = prepared.extractedText ?
|
|
3573
|
-
const
|
|
3934
|
+
const storedPath = path9.join(paths.rawSourcesDir, `${sourceId}${prepared.storedExtension}`);
|
|
3935
|
+
const extractedTextPath = prepared.extractedText ? path9.join(paths.extractsDir, `${sourceId}.md`) : void 0;
|
|
3936
|
+
const extractedMetadataPath = prepared.extractionArtifact ? path9.join(paths.extractsDir, `${sourceId}.json`) : void 0;
|
|
3937
|
+
const attachmentsDir = path9.join(paths.rawAssetsDir, sourceId);
|
|
3574
3938
|
if (previous?.storedPath) {
|
|
3575
|
-
await
|
|
3939
|
+
await fs9.rm(path9.resolve(rootDir, previous.storedPath), { force: true });
|
|
3576
3940
|
}
|
|
3577
3941
|
if (previous?.extractedTextPath) {
|
|
3578
|
-
await
|
|
3942
|
+
await fs9.rm(path9.resolve(rootDir, previous.extractedTextPath), { force: true });
|
|
3943
|
+
}
|
|
3944
|
+
if (previous?.extractedMetadataPath) {
|
|
3945
|
+
await fs9.rm(path9.resolve(rootDir, previous.extractedMetadataPath), { force: true });
|
|
3579
3946
|
}
|
|
3580
|
-
await
|
|
3581
|
-
await
|
|
3947
|
+
await fs9.rm(attachmentsDir, { recursive: true, force: true });
|
|
3948
|
+
await fs9.writeFile(storedPath, prepared.payloadBytes);
|
|
3582
3949
|
if (prepared.extractedText && extractedTextPath) {
|
|
3583
|
-
await
|
|
3950
|
+
await fs9.writeFile(extractedTextPath, prepared.extractedText, "utf8");
|
|
3951
|
+
}
|
|
3952
|
+
if (prepared.extractionArtifact && extractedMetadataPath) {
|
|
3953
|
+
await writeJsonFile(extractedMetadataPath, prepared.extractionArtifact);
|
|
3584
3954
|
}
|
|
3585
3955
|
const manifestAttachments = [];
|
|
3586
3956
|
for (const attachment of attachments) {
|
|
3587
|
-
const absoluteAttachmentPath =
|
|
3588
|
-
await ensureDir(
|
|
3589
|
-
await
|
|
3957
|
+
const absoluteAttachmentPath = path9.join(attachmentsDir, attachment.relativePath);
|
|
3958
|
+
await ensureDir(path9.dirname(absoluteAttachmentPath));
|
|
3959
|
+
await fs9.writeFile(absoluteAttachmentPath, attachment.bytes);
|
|
3590
3960
|
manifestAttachments.push({
|
|
3591
|
-
path: toPosix(
|
|
3961
|
+
path: toPosix(path9.relative(rootDir, absoluteAttachmentPath)),
|
|
3592
3962
|
mimeType: attachment.mimeType,
|
|
3593
3963
|
originalPath: attachment.originalPath
|
|
3594
3964
|
});
|
|
@@ -3598,19 +3968,22 @@ async function persistPreparedInput(rootDir, prepared, paths) {
|
|
|
3598
3968
|
title: prepared.title,
|
|
3599
3969
|
originType: prepared.originType,
|
|
3600
3970
|
sourceKind: prepared.sourceKind,
|
|
3971
|
+
sourceType: prepared.sourceType,
|
|
3601
3972
|
language: prepared.language,
|
|
3602
3973
|
originalPath: prepared.originalPath,
|
|
3603
3974
|
repoRelativePath: prepared.repoRelativePath,
|
|
3604
3975
|
url: prepared.url,
|
|
3605
|
-
storedPath: toPosix(
|
|
3606
|
-
extractedTextPath: extractedTextPath ? toPosix(
|
|
3976
|
+
storedPath: toPosix(path9.relative(rootDir, storedPath)),
|
|
3977
|
+
extractedTextPath: extractedTextPath ? toPosix(path9.relative(rootDir, extractedTextPath)) : void 0,
|
|
3978
|
+
extractedMetadataPath: extractedMetadataPath ? toPosix(path9.relative(rootDir, extractedMetadataPath)) : void 0,
|
|
3979
|
+
extractionHash,
|
|
3607
3980
|
mimeType: prepared.mimeType,
|
|
3608
3981
|
contentHash,
|
|
3609
3982
|
createdAt: previous?.createdAt ?? now,
|
|
3610
3983
|
updatedAt: now,
|
|
3611
3984
|
attachments: manifestAttachments.length ? manifestAttachments : void 0
|
|
3612
3985
|
};
|
|
3613
|
-
await writeJsonFile(
|
|
3986
|
+
await writeJsonFile(path9.join(paths.manifestsDir, `${sourceId}.json`), manifest);
|
|
3614
3987
|
await appendLogEntry(rootDir, "ingest", prepared.title, [
|
|
3615
3988
|
`source_id=${sourceId}`,
|
|
3616
3989
|
`kind=${prepared.sourceKind}`,
|
|
@@ -3628,13 +4001,16 @@ async function persistPreparedInput(rootDir, prepared, paths) {
|
|
|
3628
4001
|
return { manifest, isNew: !previous, wasUpdated: Boolean(previous) };
|
|
3629
4002
|
}
|
|
3630
4003
|
async function removeManifestArtifacts(rootDir, manifest, paths) {
|
|
3631
|
-
await
|
|
3632
|
-
await
|
|
4004
|
+
await fs9.rm(path9.join(paths.manifestsDir, `${manifest.sourceId}.json`), { force: true });
|
|
4005
|
+
await fs9.rm(path9.resolve(rootDir, manifest.storedPath), { force: true });
|
|
3633
4006
|
if (manifest.extractedTextPath) {
|
|
3634
|
-
await
|
|
4007
|
+
await fs9.rm(path9.resolve(rootDir, manifest.extractedTextPath), { force: true });
|
|
4008
|
+
}
|
|
4009
|
+
if (manifest.extractedMetadataPath) {
|
|
4010
|
+
await fs9.rm(path9.resolve(rootDir, manifest.extractedMetadataPath), { force: true });
|
|
3635
4011
|
}
|
|
3636
|
-
await
|
|
3637
|
-
await
|
|
4012
|
+
await fs9.rm(path9.join(paths.rawAssetsDir, manifest.sourceId), { recursive: true, force: true });
|
|
4013
|
+
await fs9.rm(path9.join(paths.analysesDir, `${manifest.sourceId}.json`), { force: true });
|
|
3638
4014
|
}
|
|
3639
4015
|
function repoSyncWorkspaceIgnorePaths(rootDir, paths, repoRoot) {
|
|
3640
4016
|
const candidates = [
|
|
@@ -3643,14 +4019,14 @@ function repoSyncWorkspaceIgnorePaths(rootDir, paths, repoRoot) {
|
|
|
3643
4019
|
paths.stateDir,
|
|
3644
4020
|
paths.agentDir,
|
|
3645
4021
|
paths.inboxDir,
|
|
3646
|
-
|
|
3647
|
-
|
|
3648
|
-
|
|
4022
|
+
path9.join(rootDir, ".claude"),
|
|
4023
|
+
path9.join(rootDir, ".cursor"),
|
|
4024
|
+
path9.join(rootDir, ".obsidian")
|
|
3649
4025
|
];
|
|
3650
|
-
return candidates.map((candidate) =>
|
|
4026
|
+
return candidates.map((candidate) => path9.resolve(candidate)).filter((candidate, index, items) => items.indexOf(candidate) === index).filter((candidate) => withinRoot(repoRoot, candidate));
|
|
3651
4027
|
}
|
|
3652
4028
|
function preparedMatchesManifest(manifest, prepared, contentHash) {
|
|
3653
|
-
return manifest.contentHash === contentHash && manifest.title === prepared.title && manifest.sourceKind === prepared.sourceKind && manifest.language === prepared.language && manifest.mimeType === prepared.mimeType && manifest.repoRelativePath === prepared.repoRelativePath;
|
|
4029
|
+
return manifest.contentHash === contentHash && manifest.extractionHash === (prepared.extractionHash ?? buildExtractionHash(prepared.extractedText, prepared.extractionArtifact)) && manifest.title === prepared.title && manifest.sourceKind === prepared.sourceKind && manifest.sourceType === prepared.sourceType && manifest.language === prepared.language && manifest.mimeType === prepared.mimeType && manifest.repoRelativePath === prepared.repoRelativePath;
|
|
3654
4030
|
}
|
|
3655
4031
|
function shouldDeferWatchSemanticRefresh(sourceKind) {
|
|
3656
4032
|
return sourceKind === "markdown" || sourceKind === "text" || sourceKind === "html" || sourceKind === "pdf" || sourceKind === "image";
|
|
@@ -3669,16 +4045,16 @@ async function syncTrackedRepos(rootDir, options, repoRoots) {
|
|
|
3669
4045
|
const normalizedOptions = normalizeIngestOptions(options);
|
|
3670
4046
|
const manifests = await listManifests(rootDir);
|
|
3671
4047
|
const trackedRoots = (repoRoots && repoRoots.length > 0 ? repoRoots : await listTrackedRepoRoots(rootDir)).map(
|
|
3672
|
-
(item) =>
|
|
4048
|
+
(item) => path9.resolve(item)
|
|
3673
4049
|
);
|
|
3674
4050
|
const uniqueRoots = [...new Set(trackedRoots)].sort((left, right) => left.localeCompare(right));
|
|
3675
4051
|
const manifestsByRepoRoot = /* @__PURE__ */ new Map();
|
|
3676
4052
|
for (const manifest of manifests) {
|
|
3677
4053
|
const repoRoot = repoRootFromManifest(manifest);
|
|
3678
|
-
if (!repoRoot || !uniqueRoots.includes(
|
|
4054
|
+
if (!repoRoot || !uniqueRoots.includes(path9.resolve(repoRoot))) {
|
|
3679
4055
|
continue;
|
|
3680
4056
|
}
|
|
3681
|
-
const key =
|
|
4057
|
+
const key = path9.resolve(repoRoot);
|
|
3682
4058
|
const bucket = manifestsByRepoRoot.get(key) ?? [];
|
|
3683
4059
|
bucket.push(manifest);
|
|
3684
4060
|
manifestsByRepoRoot.set(key, bucket);
|
|
@@ -3703,12 +4079,12 @@ async function syncTrackedRepos(rootDir, options, repoRoots) {
|
|
|
3703
4079
|
skipped.push(
|
|
3704
4080
|
...collected.skipped,
|
|
3705
4081
|
...collected.files.filter((absolutePath) => ignoreRoots.some((ignoreRoot) => withinRoot(ignoreRoot, absolutePath))).map((absolutePath) => ({
|
|
3706
|
-
path: toPosix(
|
|
4082
|
+
path: toPosix(path9.relative(rootDir, absolutePath)),
|
|
3707
4083
|
reason: "workspace_generated"
|
|
3708
4084
|
}))
|
|
3709
4085
|
);
|
|
3710
4086
|
scannedCount += files.length;
|
|
3711
|
-
const currentPaths = new Set(files.map((absolutePath) =>
|
|
4087
|
+
const currentPaths = new Set(files.map((absolutePath) => path9.resolve(absolutePath)));
|
|
3712
4088
|
for (const absolutePath of files) {
|
|
3713
4089
|
const prepared = await prepareFileInput(rootDir, absolutePath, repoRoot);
|
|
3714
4090
|
const result = await persistPreparedInput(rootDir, prepared, paths);
|
|
@@ -3719,7 +4095,7 @@ async function syncTrackedRepos(rootDir, options, repoRoots) {
|
|
|
3719
4095
|
}
|
|
3720
4096
|
}
|
|
3721
4097
|
for (const manifest of repoManifests) {
|
|
3722
|
-
const originalPath = manifest.originalPath ?
|
|
4098
|
+
const originalPath = manifest.originalPath ? path9.resolve(manifest.originalPath) : null;
|
|
3723
4099
|
if (originalPath && !currentPaths.has(originalPath)) {
|
|
3724
4100
|
await removeManifestArtifacts(rootDir, manifest, paths);
|
|
3725
4101
|
removed.push(manifest);
|
|
@@ -3727,7 +4103,7 @@ async function syncTrackedRepos(rootDir, options, repoRoots) {
|
|
|
3727
4103
|
}
|
|
3728
4104
|
}
|
|
3729
4105
|
if (uniqueRoots.length > 0) {
|
|
3730
|
-
await appendLogEntry(rootDir, "sync_repo", uniqueRoots.map((repoRoot) => toPosix(
|
|
4106
|
+
await appendLogEntry(rootDir, "sync_repo", uniqueRoots.map((repoRoot) => toPosix(path9.relative(rootDir, repoRoot)) || ".").join(","), [
|
|
3731
4107
|
`repo_roots=${uniqueRoots.length}`,
|
|
3732
4108
|
`scanned=${scannedCount}`,
|
|
3733
4109
|
`imported=${imported.length}`,
|
|
@@ -3750,16 +4126,16 @@ async function syncTrackedReposForWatch(rootDir, options, repoRoots) {
|
|
|
3750
4126
|
const normalizedOptions = normalizeIngestOptions(options);
|
|
3751
4127
|
const manifests = await listManifests(rootDir);
|
|
3752
4128
|
const trackedRoots = (repoRoots && repoRoots.length > 0 ? repoRoots : await listTrackedRepoRoots(rootDir)).map(
|
|
3753
|
-
(item) =>
|
|
4129
|
+
(item) => path9.resolve(item)
|
|
3754
4130
|
);
|
|
3755
4131
|
const uniqueRoots = [...new Set(trackedRoots)].sort((left, right) => left.localeCompare(right));
|
|
3756
4132
|
const manifestsByRepoRoot = /* @__PURE__ */ new Map();
|
|
3757
4133
|
for (const manifest of manifests) {
|
|
3758
4134
|
const repoRoot = repoRootFromManifest(manifest);
|
|
3759
|
-
if (!repoRoot || !uniqueRoots.includes(
|
|
4135
|
+
if (!repoRoot || !uniqueRoots.includes(path9.resolve(repoRoot))) {
|
|
3760
4136
|
continue;
|
|
3761
4137
|
}
|
|
3762
|
-
const key =
|
|
4138
|
+
const key = path9.resolve(repoRoot);
|
|
3763
4139
|
const bucket = manifestsByRepoRoot.get(key) ?? [];
|
|
3764
4140
|
bucket.push(manifest);
|
|
3765
4141
|
manifestsByRepoRoot.set(key, bucket);
|
|
@@ -3774,7 +4150,7 @@ async function syncTrackedReposForWatch(rootDir, options, repoRoots) {
|
|
|
3774
4150
|
for (const repoRoot of uniqueRoots) {
|
|
3775
4151
|
const repoManifests = manifestsByRepoRoot.get(repoRoot) ?? [];
|
|
3776
4152
|
const manifestsByOriginalPath = new Map(
|
|
3777
|
-
repoManifests.filter((manifest) => manifest.originalPath).map((manifest) => [
|
|
4153
|
+
repoManifests.filter((manifest) => manifest.originalPath).map((manifest) => [path9.resolve(manifest.originalPath), manifest])
|
|
3778
4154
|
);
|
|
3779
4155
|
if (!await fileExists(repoRoot)) {
|
|
3780
4156
|
for (const manifest of repoManifests) {
|
|
@@ -3782,7 +4158,7 @@ async function syncTrackedReposForWatch(rootDir, options, repoRoots) {
|
|
|
3782
4158
|
pendingSemanticRefresh.push({
|
|
3783
4159
|
id: pendingSemanticRefreshId("removed", repoRoot, manifest.repoRelativePath ?? manifest.storedPath),
|
|
3784
4160
|
repoRoot,
|
|
3785
|
-
path: toPosix(
|
|
4161
|
+
path: toPosix(path9.relative(rootDir, manifest.originalPath ?? manifest.storedPath)),
|
|
3786
4162
|
changeType: "removed",
|
|
3787
4163
|
detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3788
4164
|
sourceId: manifest.sourceId,
|
|
@@ -3802,16 +4178,16 @@ async function syncTrackedReposForWatch(rootDir, options, repoRoots) {
|
|
|
3802
4178
|
skipped.push(
|
|
3803
4179
|
...collected.skipped,
|
|
3804
4180
|
...collected.files.filter((absolutePath) => ignoreRoots.some((ignoreRoot) => withinRoot(ignoreRoot, absolutePath))).map((absolutePath) => ({
|
|
3805
|
-
path: toPosix(
|
|
4181
|
+
path: toPosix(path9.relative(rootDir, absolutePath)),
|
|
3806
4182
|
reason: "workspace_generated"
|
|
3807
4183
|
}))
|
|
3808
4184
|
);
|
|
3809
4185
|
scannedCount += files.length;
|
|
3810
|
-
const currentPaths = new Set(files.map((absolutePath) =>
|
|
4186
|
+
const currentPaths = new Set(files.map((absolutePath) => path9.resolve(absolutePath)));
|
|
3811
4187
|
for (const absolutePath of files) {
|
|
3812
4188
|
const prepared = await prepareFileInput(rootDir, absolutePath, repoRoot);
|
|
3813
4189
|
if (shouldDeferWatchSemanticRefresh(prepared.sourceKind)) {
|
|
3814
|
-
const existing = manifestsByOriginalPath.get(
|
|
4190
|
+
const existing = manifestsByOriginalPath.get(path9.resolve(absolutePath));
|
|
3815
4191
|
const contentHash = buildCompositeHash(prepared.payloadBytes, prepared.attachments);
|
|
3816
4192
|
const changed = !existing || !preparedMatchesManifest(existing, prepared, contentHash);
|
|
3817
4193
|
if (changed) {
|
|
@@ -3819,10 +4195,10 @@ async function syncTrackedReposForWatch(rootDir, options, repoRoots) {
|
|
|
3819
4195
|
id: pendingSemanticRefreshId(
|
|
3820
4196
|
existing ? "modified" : "added",
|
|
3821
4197
|
repoRoot,
|
|
3822
|
-
prepared.repoRelativePath ?? toPosix(
|
|
4198
|
+
prepared.repoRelativePath ?? toPosix(path9.relative(repoRoot, absolutePath))
|
|
3823
4199
|
),
|
|
3824
4200
|
repoRoot,
|
|
3825
|
-
path: toPosix(
|
|
4201
|
+
path: toPosix(path9.relative(rootDir, absolutePath)),
|
|
3826
4202
|
changeType: existing ? "modified" : "added",
|
|
3827
4203
|
detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3828
4204
|
sourceId: existing?.sourceId,
|
|
@@ -3842,13 +4218,13 @@ async function syncTrackedReposForWatch(rootDir, options, repoRoots) {
|
|
|
3842
4218
|
}
|
|
3843
4219
|
}
|
|
3844
4220
|
for (const manifest of repoManifests) {
|
|
3845
|
-
const originalPath = manifest.originalPath ?
|
|
4221
|
+
const originalPath = manifest.originalPath ? path9.resolve(manifest.originalPath) : null;
|
|
3846
4222
|
if (originalPath && !currentPaths.has(originalPath)) {
|
|
3847
4223
|
if (shouldDeferWatchSemanticRefresh(manifest.sourceKind)) {
|
|
3848
4224
|
pendingSemanticRefresh.push({
|
|
3849
|
-
id: pendingSemanticRefreshId("removed", repoRoot, manifest.repoRelativePath ?? toPosix(
|
|
4225
|
+
id: pendingSemanticRefreshId("removed", repoRoot, manifest.repoRelativePath ?? toPosix(path9.relative(repoRoot, originalPath))),
|
|
3850
4226
|
repoRoot,
|
|
3851
|
-
path: toPosix(
|
|
4227
|
+
path: toPosix(path9.relative(rootDir, originalPath)),
|
|
3852
4228
|
changeType: "removed",
|
|
3853
4229
|
detectedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3854
4230
|
sourceId: manifest.sourceId,
|
|
@@ -3866,7 +4242,7 @@ async function syncTrackedReposForWatch(rootDir, options, repoRoots) {
|
|
|
3866
4242
|
await appendLogEntry(
|
|
3867
4243
|
rootDir,
|
|
3868
4244
|
"sync_repo_watch",
|
|
3869
|
-
uniqueRoots.map((repoRoot) => toPosix(
|
|
4245
|
+
uniqueRoots.map((repoRoot) => toPosix(path9.relative(rootDir, repoRoot)) || ".").join(","),
|
|
3870
4246
|
[
|
|
3871
4247
|
`repo_roots=${uniqueRoots.length}`,
|
|
3872
4248
|
`scanned=${scannedCount}`,
|
|
@@ -3891,19 +4267,36 @@ async function syncTrackedReposForWatch(rootDir, options, repoRoots) {
|
|
|
3891
4267
|
staleSourceIds: [...staleSourceIds]
|
|
3892
4268
|
};
|
|
3893
4269
|
}
|
|
3894
|
-
async function prepareFileInput(
|
|
3895
|
-
const payloadBytes = await
|
|
4270
|
+
async function prepareFileInput(rootDir, absoluteInput, repoRoot) {
|
|
4271
|
+
const payloadBytes = await fs9.readFile(absoluteInput);
|
|
3896
4272
|
const mimeType = guessMimeType(absoluteInput);
|
|
3897
4273
|
const sourceKind = inferKind(mimeType, absoluteInput);
|
|
3898
4274
|
const language = inferCodeLanguage(absoluteInput, mimeType);
|
|
3899
|
-
const storedExtension =
|
|
4275
|
+
const storedExtension = path9.extname(absoluteInput) || `.${mime.extension(mimeType) || "bin"}`;
|
|
3900
4276
|
let title;
|
|
3901
4277
|
let extractedText;
|
|
4278
|
+
let extractionArtifact;
|
|
3902
4279
|
if (sourceKind === "markdown" || sourceKind === "text" || sourceKind === "code") {
|
|
3903
4280
|
extractedText = payloadBytes.toString("utf8");
|
|
3904
|
-
title = titleFromText(
|
|
4281
|
+
title = titleFromText(path9.basename(absoluteInput, path9.extname(absoluteInput)), extractedText);
|
|
4282
|
+
extractionArtifact = createPlainTextExtractionArtifact(sourceKind, mimeType);
|
|
4283
|
+
} else if (sourceKind === "pdf") {
|
|
4284
|
+
title = path9.basename(absoluteInput, path9.extname(absoluteInput));
|
|
4285
|
+
const extracted = await extractPdfText({ mimeType, bytes: payloadBytes });
|
|
4286
|
+
extractedText = extracted.extractedText;
|
|
4287
|
+
extractionArtifact = extracted.artifact;
|
|
4288
|
+
} else if (sourceKind === "image") {
|
|
4289
|
+
title = path9.basename(absoluteInput, path9.extname(absoluteInput));
|
|
4290
|
+
const extracted = await extractImageWithVision(rootDir, {
|
|
4291
|
+
title,
|
|
4292
|
+
mimeType,
|
|
4293
|
+
filePath: absoluteInput
|
|
4294
|
+
});
|
|
4295
|
+
title = extracted.title?.trim() || title;
|
|
4296
|
+
extractedText = extracted.extractedText;
|
|
4297
|
+
extractionArtifact = extracted.artifact;
|
|
3905
4298
|
} else {
|
|
3906
|
-
title =
|
|
4299
|
+
title = path9.basename(absoluteInput, path9.extname(absoluteInput));
|
|
3907
4300
|
}
|
|
3908
4301
|
return {
|
|
3909
4302
|
title,
|
|
@@ -3915,15 +4308,18 @@ async function prepareFileInput(_rootDir, absoluteInput, repoRoot) {
|
|
|
3915
4308
|
mimeType,
|
|
3916
4309
|
storedExtension,
|
|
3917
4310
|
payloadBytes,
|
|
3918
|
-
extractedText
|
|
4311
|
+
extractedText,
|
|
4312
|
+
extractionArtifact,
|
|
4313
|
+
extractionHash: buildExtractionHash(extractedText, extractionArtifact)
|
|
3919
4314
|
};
|
|
3920
4315
|
}
|
|
3921
|
-
async function prepareUrlInput(input, options) {
|
|
4316
|
+
async function prepareUrlInput(rootDir, input, options) {
|
|
3922
4317
|
const response = await fetch(input);
|
|
3923
4318
|
if (!response.ok) {
|
|
3924
4319
|
throw new Error(`Failed to fetch ${input}: ${response.status} ${response.statusText}`);
|
|
3925
4320
|
}
|
|
3926
|
-
const
|
|
4321
|
+
const finalUrl = normalizeOriginUrl(response.url || input);
|
|
4322
|
+
const inputUrl = new URL(finalUrl);
|
|
3927
4323
|
const originalPayloadBytes = Buffer.from(await response.arrayBuffer());
|
|
3928
4324
|
let payloadBytes = originalPayloadBytes;
|
|
3929
4325
|
let mimeType = resolveUrlMimeType(input, response);
|
|
@@ -3932,18 +4328,19 @@ async function prepareUrlInput(input, options) {
|
|
|
3932
4328
|
let storedExtension = ".bin";
|
|
3933
4329
|
let title = inputUrl.hostname + inputUrl.pathname;
|
|
3934
4330
|
let extractedText;
|
|
4331
|
+
let extractionArtifact;
|
|
3935
4332
|
let attachments;
|
|
3936
4333
|
let contentHash;
|
|
3937
4334
|
const logDetails = [];
|
|
3938
4335
|
if (sourceKind === "html" || mimeType.startsWith("text/html")) {
|
|
3939
4336
|
const html = originalPayloadBytes.toString("utf8");
|
|
3940
|
-
const initialConversion = await convertHtmlToMarkdown(html,
|
|
4337
|
+
const initialConversion = await convertHtmlToMarkdown(html, finalUrl);
|
|
3941
4338
|
title = initialConversion.title;
|
|
3942
4339
|
let localizedHtml = html;
|
|
3943
4340
|
let localAssetReplacements;
|
|
3944
4341
|
if (options.includeAssets) {
|
|
3945
4342
|
const { attachments: remoteAttachments, skippedCount } = await collectRemoteImageAttachments(
|
|
3946
|
-
extractHtmlImageReferences(html,
|
|
4343
|
+
extractHtmlImageReferences(html, finalUrl),
|
|
3947
4344
|
options
|
|
3948
4345
|
);
|
|
3949
4346
|
if (remoteAttachments.length) {
|
|
@@ -3953,18 +4350,19 @@ async function prepareUrlInput(input, options) {
|
|
|
3953
4350
|
localAssetReplacements = new Map(
|
|
3954
4351
|
remoteAttachments.map((attachment) => [attachment.originalPath ?? "", `../assets/${sourceId}/${attachment.relativePath}`])
|
|
3955
4352
|
);
|
|
3956
|
-
localizedHtml = rewriteHtmlImageReferences(html,
|
|
4353
|
+
localizedHtml = rewriteHtmlImageReferences(html, finalUrl, localAssetReplacements);
|
|
3957
4354
|
logDetails.push(`remote_assets=${remoteAttachments.length}`);
|
|
3958
4355
|
}
|
|
3959
4356
|
if (skippedCount) {
|
|
3960
4357
|
logDetails.push(`remote_asset_skips=${skippedCount}`);
|
|
3961
4358
|
}
|
|
3962
4359
|
}
|
|
3963
|
-
const converted = localizedHtml === html && !attachments?.length ? initialConversion : await convertHtmlToMarkdown(localizedHtml,
|
|
4360
|
+
const converted = localizedHtml === html && !attachments?.length ? initialConversion : await convertHtmlToMarkdown(localizedHtml, finalUrl);
|
|
3964
4361
|
extractedText = converted.markdown;
|
|
4362
|
+
extractionArtifact = createHtmlReadabilityExtractionArtifact("markdown", "text/markdown");
|
|
3965
4363
|
if (localAssetReplacements?.size) {
|
|
3966
4364
|
const absoluteLocalAssetReplacements = new Map(
|
|
3967
|
-
[...localAssetReplacements.values()].map((replacement) => [new URL(replacement,
|
|
4365
|
+
[...localAssetReplacements.values()].map((replacement) => [new URL(replacement, finalUrl).toString(), replacement])
|
|
3968
4366
|
);
|
|
3969
4367
|
extractedText = rewriteMarkdownImageTargets(extractedText, absoluteLocalAssetReplacements);
|
|
3970
4368
|
}
|
|
@@ -3973,14 +4371,15 @@ async function prepareUrlInput(input, options) {
|
|
|
3973
4371
|
sourceKind = "markdown";
|
|
3974
4372
|
storedExtension = ".md";
|
|
3975
4373
|
} else {
|
|
3976
|
-
const extension =
|
|
4374
|
+
const extension = path9.extname(inputUrl.pathname);
|
|
3977
4375
|
storedExtension = extension || `.${mime.extension(mimeType) || "bin"}`;
|
|
3978
4376
|
if (sourceKind === "markdown" || sourceKind === "text" || sourceKind === "code") {
|
|
3979
4377
|
extractedText = payloadBytes.toString("utf8");
|
|
3980
4378
|
title = titleFromText(title || inputUrl.hostname, extractedText);
|
|
4379
|
+
extractionArtifact = createPlainTextExtractionArtifact(sourceKind, mimeType);
|
|
3981
4380
|
if (sourceKind === "markdown" && options.includeAssets) {
|
|
3982
4381
|
const { attachments: remoteAttachments, skippedCount } = await collectRemoteImageAttachments(
|
|
3983
|
-
extractMarkdownImageReferences(extractedText,
|
|
4382
|
+
extractMarkdownImageReferences(extractedText, finalUrl),
|
|
3984
4383
|
options
|
|
3985
4384
|
);
|
|
3986
4385
|
if (remoteAttachments.length) {
|
|
@@ -3990,7 +4389,7 @@ async function prepareUrlInput(input, options) {
|
|
|
3990
4389
|
const replacements = new Map(
|
|
3991
4390
|
remoteAttachments.map((attachment) => [attachment.originalPath ?? "", `../assets/${sourceId}/${attachment.relativePath}`])
|
|
3992
4391
|
);
|
|
3993
|
-
extractedText = rewriteMarkdownImageReferences(extractedText,
|
|
4392
|
+
extractedText = rewriteMarkdownImageReferences(extractedText, finalUrl, replacements);
|
|
3994
4393
|
payloadBytes = Buffer.from(extractedText, "utf8");
|
|
3995
4394
|
logDetails.push(`remote_assets=${remoteAttachments.length}`);
|
|
3996
4395
|
}
|
|
@@ -3998,6 +4397,19 @@ async function prepareUrlInput(input, options) {
|
|
|
3998
4397
|
logDetails.push(`remote_asset_skips=${skippedCount}`);
|
|
3999
4398
|
}
|
|
4000
4399
|
}
|
|
4400
|
+
} else if (sourceKind === "pdf") {
|
|
4401
|
+
const extracted = await extractPdfText({ mimeType, bytes: payloadBytes });
|
|
4402
|
+
extractedText = extracted.extractedText;
|
|
4403
|
+
extractionArtifact = extracted.artifact;
|
|
4404
|
+
} else if (sourceKind === "image") {
|
|
4405
|
+
const extracted = await extractImageWithVision(rootDir, {
|
|
4406
|
+
title,
|
|
4407
|
+
mimeType,
|
|
4408
|
+
bytes: payloadBytes
|
|
4409
|
+
});
|
|
4410
|
+
title = extracted.title?.trim() || title;
|
|
4411
|
+
extractedText = extracted.extractedText;
|
|
4412
|
+
extractionArtifact = extracted.artifact;
|
|
4001
4413
|
}
|
|
4002
4414
|
}
|
|
4003
4415
|
return {
|
|
@@ -4005,11 +4417,13 @@ async function prepareUrlInput(input, options) {
|
|
|
4005
4417
|
originType: "url",
|
|
4006
4418
|
sourceKind,
|
|
4007
4419
|
language,
|
|
4008
|
-
url:
|
|
4420
|
+
url: finalUrl,
|
|
4009
4421
|
mimeType,
|
|
4010
4422
|
storedExtension,
|
|
4011
4423
|
payloadBytes,
|
|
4012
4424
|
extractedText,
|
|
4425
|
+
extractionArtifact,
|
|
4426
|
+
extractionHash: buildExtractionHash(extractedText, extractionArtifact),
|
|
4013
4427
|
attachments,
|
|
4014
4428
|
contentHash,
|
|
4015
4429
|
logDetails
|
|
@@ -4023,14 +4437,14 @@ async function collectInboxAttachmentRefs(inputDir, files) {
|
|
|
4023
4437
|
if (sourceKind !== "markdown") {
|
|
4024
4438
|
continue;
|
|
4025
4439
|
}
|
|
4026
|
-
const content = await
|
|
4440
|
+
const content = await fs9.readFile(absolutePath, "utf8");
|
|
4027
4441
|
const refs = extractMarkdownReferences(content);
|
|
4028
4442
|
if (!refs.length) {
|
|
4029
4443
|
continue;
|
|
4030
4444
|
}
|
|
4031
4445
|
const sourceRefs = [];
|
|
4032
4446
|
for (const ref of refs) {
|
|
4033
|
-
const resolved =
|
|
4447
|
+
const resolved = path9.resolve(path9.dirname(absolutePath), ref);
|
|
4034
4448
|
if (!resolved.startsWith(inputDir) || !await fileExists(resolved)) {
|
|
4035
4449
|
continue;
|
|
4036
4450
|
}
|
|
@@ -4064,12 +4478,12 @@ function rewriteMarkdownReferences(content, replacements) {
|
|
|
4064
4478
|
});
|
|
4065
4479
|
}
|
|
4066
4480
|
async function prepareInboxMarkdownInput(absolutePath, attachmentRefs) {
|
|
4067
|
-
const originalBytes = await
|
|
4481
|
+
const originalBytes = await fs9.readFile(absolutePath);
|
|
4068
4482
|
const originalText = originalBytes.toString("utf8");
|
|
4069
|
-
const title = titleFromText(
|
|
4483
|
+
const title = titleFromText(path9.basename(absolutePath, path9.extname(absolutePath)), originalText);
|
|
4070
4484
|
const attachments = [];
|
|
4071
4485
|
for (const attachmentRef of attachmentRefs) {
|
|
4072
|
-
const bytes = await
|
|
4486
|
+
const bytes = await fs9.readFile(attachmentRef.absolutePath);
|
|
4073
4487
|
attachments.push({
|
|
4074
4488
|
relativePath: sanitizeAssetRelativePath(attachmentRef.relativeRef),
|
|
4075
4489
|
mimeType: guessMimeType(attachmentRef.absolutePath),
|
|
@@ -4086,15 +4500,18 @@ async function prepareInboxMarkdownInput(absolutePath, attachmentRefs) {
|
|
|
4086
4500
|
])
|
|
4087
4501
|
);
|
|
4088
4502
|
const rewrittenText = rewriteMarkdownReferences(originalText, replacements);
|
|
4503
|
+
const extractionArtifact = createPlainTextExtractionArtifact("markdown", "text/markdown");
|
|
4089
4504
|
return {
|
|
4090
4505
|
title,
|
|
4091
4506
|
originType: "file",
|
|
4092
4507
|
sourceKind: "markdown",
|
|
4093
4508
|
originalPath: toPosix(absolutePath),
|
|
4094
4509
|
mimeType: "text/markdown",
|
|
4095
|
-
storedExtension:
|
|
4510
|
+
storedExtension: path9.extname(absolutePath) || ".md",
|
|
4096
4511
|
payloadBytes: Buffer.from(rewrittenText, "utf8"),
|
|
4097
4512
|
extractedText: rewrittenText,
|
|
4513
|
+
extractionArtifact,
|
|
4514
|
+
extractionHash: buildExtractionHash(rewrittenText, extractionArtifact),
|
|
4098
4515
|
attachments,
|
|
4099
4516
|
contentHash
|
|
4100
4517
|
};
|
|
@@ -4105,16 +4522,16 @@ function isSupportedInboxKind(sourceKind) {
|
|
|
4105
4522
|
async function ingestInput(rootDir, input, options) {
|
|
4106
4523
|
const { paths } = await initWorkspace(rootDir);
|
|
4107
4524
|
const normalizedOptions = normalizeIngestOptions(options);
|
|
4108
|
-
const absoluteInput =
|
|
4109
|
-
const repoRoot = isHttpUrl(input) || normalizedOptions.repoRoot ? normalizedOptions.repoRoot : await findNearestGitRoot2(absoluteInput).then((value) => value ??
|
|
4110
|
-
const prepared = isHttpUrl(input) ? await prepareUrlInput(input, normalizedOptions) : await prepareFileInput(rootDir, absoluteInput, repoRoot);
|
|
4525
|
+
const absoluteInput = path9.resolve(rootDir, input);
|
|
4526
|
+
const repoRoot = isHttpUrl(input) || normalizedOptions.repoRoot ? normalizedOptions.repoRoot : await findNearestGitRoot2(absoluteInput).then((value) => value ?? path9.dirname(absoluteInput));
|
|
4527
|
+
const prepared = isHttpUrl(input) ? await prepareUrlInput(rootDir, input, normalizedOptions) : await prepareFileInput(rootDir, absoluteInput, repoRoot);
|
|
4111
4528
|
const result = await persistPreparedInput(rootDir, prepared, paths);
|
|
4112
4529
|
return result.manifest;
|
|
4113
4530
|
}
|
|
4114
4531
|
async function addInput(rootDir, input, options = {}) {
|
|
4115
4532
|
const { paths } = await initWorkspace(rootDir);
|
|
4116
|
-
if (!isHttpUrl(input) && !arxivIdFromInput(input)) {
|
|
4117
|
-
throw new Error("`swarmvault add` only supports URLs
|
|
4533
|
+
if (!isHttpUrl(input) && !arxivIdFromInput(input) && !doiFromInput(input)) {
|
|
4534
|
+
throw new Error("`swarmvault add` only supports URLs, bare arXiv ids, and bare DOI strings in the current release.");
|
|
4118
4535
|
}
|
|
4119
4536
|
let prepared = null;
|
|
4120
4537
|
let captureType = "url";
|
|
@@ -4127,26 +4544,55 @@ async function addInput(rootDir, input, options = {}) {
|
|
|
4127
4544
|
title: captured.title,
|
|
4128
4545
|
url: captured.normalizedUrl,
|
|
4129
4546
|
markdown: captured.markdown,
|
|
4547
|
+
sourceType: "arxiv",
|
|
4130
4548
|
logDetails: ["capture_type=arxiv"]
|
|
4131
4549
|
});
|
|
4132
4550
|
captureType = "arxiv";
|
|
4133
4551
|
normalizedUrl = captured.normalizedUrl;
|
|
4552
|
+
} else if (doiFromInput(input)) {
|
|
4553
|
+
const captured = await captureDoiMarkdown(rootDir, input, options);
|
|
4554
|
+
prepared = prepareCapturedMarkdownInput({
|
|
4555
|
+
title: captured.title,
|
|
4556
|
+
url: captured.normalizedUrl,
|
|
4557
|
+
markdown: captured.markdown,
|
|
4558
|
+
sourceType: "doi",
|
|
4559
|
+
attachments: captured.attachments,
|
|
4560
|
+
logDetails: ["capture_type=doi"]
|
|
4561
|
+
});
|
|
4562
|
+
captureType = "doi";
|
|
4563
|
+
normalizedUrl = captured.normalizedUrl;
|
|
4134
4564
|
} else if (isTweetUrl(input)) {
|
|
4135
4565
|
const captured = await captureTweetMarkdown(input, options);
|
|
4136
4566
|
prepared = prepareCapturedMarkdownInput({
|
|
4137
4567
|
title: captured.title,
|
|
4138
4568
|
url: captured.normalizedUrl,
|
|
4139
4569
|
markdown: captured.markdown,
|
|
4570
|
+
sourceType: "tweet",
|
|
4140
4571
|
logDetails: ["capture_type=tweet"]
|
|
4141
4572
|
});
|
|
4142
4573
|
captureType = "tweet";
|
|
4143
4574
|
normalizedUrl = captured.normalizedUrl;
|
|
4575
|
+
} else if (isHttpUrl(input)) {
|
|
4576
|
+
const captured = await captureArticleMarkdown(rootDir, input, options, {
|
|
4577
|
+
sourceType: "article",
|
|
4578
|
+
sourceUrl: input
|
|
4579
|
+
});
|
|
4580
|
+
prepared = prepareCapturedMarkdownInput({
|
|
4581
|
+
title: captured.title,
|
|
4582
|
+
url: captured.normalizedUrl,
|
|
4583
|
+
markdown: captured.markdown,
|
|
4584
|
+
sourceType: "article",
|
|
4585
|
+
attachments: captured.attachments,
|
|
4586
|
+
logDetails: ["capture_type=article"]
|
|
4587
|
+
});
|
|
4588
|
+
captureType = "article";
|
|
4589
|
+
normalizedUrl = captured.normalizedUrl;
|
|
4144
4590
|
}
|
|
4145
4591
|
} catch {
|
|
4146
4592
|
fallback = true;
|
|
4147
4593
|
}
|
|
4148
4594
|
if (!prepared) {
|
|
4149
|
-
normalizedUrl = arxivIdFromInput(input) ? `https://arxiv.org/abs/${arxivIdFromInput(input)}` : normalizeOriginUrl(input);
|
|
4595
|
+
normalizedUrl = arxivIdFromInput(input) ? `https://arxiv.org/abs/${arxivIdFromInput(input)}` : doiFromInput(input) ? `https://doi.org/${encodeURIComponent(doiFromInput(input) ?? "")}` : normalizeOriginUrl(input);
|
|
4150
4596
|
return {
|
|
4151
4597
|
captureType: "url",
|
|
4152
4598
|
manifest: await ingestInput(rootDir, normalizedUrl, options),
|
|
@@ -4167,7 +4613,7 @@ async function addInput(rootDir, input, options = {}) {
|
|
|
4167
4613
|
async function ingestDirectory(rootDir, inputDir, options) {
|
|
4168
4614
|
const { paths } = await initWorkspace(rootDir);
|
|
4169
4615
|
const normalizedOptions = normalizeIngestOptions(options);
|
|
4170
|
-
const absoluteInputDir =
|
|
4616
|
+
const absoluteInputDir = path9.resolve(rootDir, inputDir);
|
|
4171
4617
|
const repoRoot = normalizedOptions.repoRoot ?? await findNearestGitRoot2(absoluteInputDir) ?? absoluteInputDir;
|
|
4172
4618
|
if (!await fileExists(absoluteInputDir)) {
|
|
4173
4619
|
throw new Error(`Directory not found: ${absoluteInputDir}`);
|
|
@@ -4183,11 +4629,11 @@ async function ingestDirectory(rootDir, inputDir, options) {
|
|
|
4183
4629
|
} else if (result.wasUpdated) {
|
|
4184
4630
|
updated.push(result.manifest);
|
|
4185
4631
|
} else {
|
|
4186
|
-
skipped.push({ path: toPosix(
|
|
4632
|
+
skipped.push({ path: toPosix(path9.relative(rootDir, absolutePath)), reason: "duplicate_content" });
|
|
4187
4633
|
}
|
|
4188
4634
|
}
|
|
4189
|
-
await appendLogEntry(rootDir, "ingest_directory", toPosix(
|
|
4190
|
-
`repo_root=${toPosix(
|
|
4635
|
+
await appendLogEntry(rootDir, "ingest_directory", toPosix(path9.relative(rootDir, absoluteInputDir)) || ".", [
|
|
4636
|
+
`repo_root=${toPosix(path9.relative(rootDir, repoRoot)) || "."}`,
|
|
4191
4637
|
`scanned=${files.length}`,
|
|
4192
4638
|
`imported=${imported.length}`,
|
|
4193
4639
|
`updated=${updated.length}`,
|
|
@@ -4204,7 +4650,7 @@ async function ingestDirectory(rootDir, inputDir, options) {
|
|
|
4204
4650
|
}
|
|
4205
4651
|
async function importInbox(rootDir, inputDir) {
|
|
4206
4652
|
const { paths } = await initWorkspace(rootDir);
|
|
4207
|
-
const effectiveInputDir =
|
|
4653
|
+
const effectiveInputDir = path9.resolve(rootDir, inputDir ?? paths.inboxDir);
|
|
4208
4654
|
if (!await fileExists(effectiveInputDir)) {
|
|
4209
4655
|
throw new Error(`Inbox directory not found: ${effectiveInputDir}`);
|
|
4210
4656
|
}
|
|
@@ -4215,31 +4661,31 @@ async function importInbox(rootDir, inputDir) {
|
|
|
4215
4661
|
const skipped = [];
|
|
4216
4662
|
let attachmentCount = 0;
|
|
4217
4663
|
for (const absolutePath of files) {
|
|
4218
|
-
const basename =
|
|
4664
|
+
const basename = path9.basename(absolutePath);
|
|
4219
4665
|
if (basename.startsWith(".")) {
|
|
4220
|
-
skipped.push({ path: toPosix(
|
|
4666
|
+
skipped.push({ path: toPosix(path9.relative(rootDir, absolutePath)), reason: "hidden_file" });
|
|
4221
4667
|
continue;
|
|
4222
4668
|
}
|
|
4223
4669
|
if (claimedAttachments.has(absolutePath)) {
|
|
4224
|
-
skipped.push({ path: toPosix(
|
|
4670
|
+
skipped.push({ path: toPosix(path9.relative(rootDir, absolutePath)), reason: "referenced_attachment" });
|
|
4225
4671
|
continue;
|
|
4226
4672
|
}
|
|
4227
4673
|
const mimeType = guessMimeType(absolutePath);
|
|
4228
4674
|
const sourceKind = inferKind(mimeType, absolutePath);
|
|
4229
4675
|
if (!isSupportedInboxKind(sourceKind)) {
|
|
4230
|
-
skipped.push({ path: toPosix(
|
|
4676
|
+
skipped.push({ path: toPosix(path9.relative(rootDir, absolutePath)), reason: `unsupported_kind:${sourceKind}` });
|
|
4231
4677
|
continue;
|
|
4232
4678
|
}
|
|
4233
4679
|
const prepared = sourceKind === "markdown" && refsBySource.has(absolutePath) ? await prepareInboxMarkdownInput(absolutePath, refsBySource.get(absolutePath) ?? []) : await prepareFileInput(rootDir, absolutePath);
|
|
4234
4680
|
const result = await persistPreparedInput(rootDir, prepared, paths);
|
|
4235
4681
|
if (!result.isNew) {
|
|
4236
|
-
skipped.push({ path: toPosix(
|
|
4682
|
+
skipped.push({ path: toPosix(path9.relative(rootDir, absolutePath)), reason: "duplicate_content" });
|
|
4237
4683
|
continue;
|
|
4238
4684
|
}
|
|
4239
4685
|
attachmentCount += result.manifest.attachments?.length ?? 0;
|
|
4240
4686
|
imported.push(result.manifest);
|
|
4241
4687
|
}
|
|
4242
|
-
await appendLogEntry(rootDir, "inbox_import", toPosix(
|
|
4688
|
+
await appendLogEntry(rootDir, "inbox_import", toPosix(path9.relative(rootDir, effectiveInputDir)) || ".", [
|
|
4243
4689
|
`scanned=${files.length}`,
|
|
4244
4690
|
`imported=${imported.length}`,
|
|
4245
4691
|
`attachments=${attachmentCount}`,
|
|
@@ -4258,9 +4704,9 @@ async function listManifests(rootDir) {
|
|
|
4258
4704
|
if (!await fileExists(paths.manifestsDir)) {
|
|
4259
4705
|
return [];
|
|
4260
4706
|
}
|
|
4261
|
-
const entries = await
|
|
4707
|
+
const entries = await fs9.readdir(paths.manifestsDir);
|
|
4262
4708
|
const manifests = await Promise.all(
|
|
4263
|
-
entries.filter((entry) => entry.endsWith(".json")).map((entry) => readJsonFile(
|
|
4709
|
+
entries.filter((entry) => entry.endsWith(".json")).map((entry) => readJsonFile(path9.join(paths.manifestsDir, entry)))
|
|
4264
4710
|
);
|
|
4265
4711
|
return manifests.filter((manifest) => Boolean(manifest));
|
|
4266
4712
|
}
|
|
@@ -4268,28 +4714,38 @@ async function readExtractedText(rootDir, manifest) {
|
|
|
4268
4714
|
if (!manifest.extractedTextPath) {
|
|
4269
4715
|
return void 0;
|
|
4270
4716
|
}
|
|
4271
|
-
const absolutePath =
|
|
4717
|
+
const absolutePath = path9.resolve(rootDir, manifest.extractedTextPath);
|
|
4718
|
+
if (!await fileExists(absolutePath)) {
|
|
4719
|
+
return void 0;
|
|
4720
|
+
}
|
|
4721
|
+
return fs9.readFile(absolutePath, "utf8");
|
|
4722
|
+
}
|
|
4723
|
+
async function readExtractionArtifact(rootDir, manifest) {
|
|
4724
|
+
if (!manifest.extractedMetadataPath) {
|
|
4725
|
+
return void 0;
|
|
4726
|
+
}
|
|
4727
|
+
const absolutePath = path9.resolve(rootDir, manifest.extractedMetadataPath);
|
|
4272
4728
|
if (!await fileExists(absolutePath)) {
|
|
4273
4729
|
return void 0;
|
|
4274
4730
|
}
|
|
4275
|
-
return
|
|
4731
|
+
return await readJsonFile(absolutePath) ?? void 0;
|
|
4276
4732
|
}
|
|
4277
4733
|
|
|
4278
4734
|
// src/mcp.ts
|
|
4279
|
-
import
|
|
4280
|
-
import
|
|
4735
|
+
import fs16 from "fs/promises";
|
|
4736
|
+
import path19 from "path";
|
|
4281
4737
|
import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
4282
4738
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
4283
|
-
import { z as
|
|
4739
|
+
import { z as z8 } from "zod";
|
|
4284
4740
|
|
|
4285
4741
|
// src/schema.ts
|
|
4286
|
-
import
|
|
4287
|
-
import
|
|
4742
|
+
import fs10 from "fs/promises";
|
|
4743
|
+
import path10 from "path";
|
|
4288
4744
|
function normalizeSchemaContent(content) {
|
|
4289
4745
|
return content.trim() ? content.trim() : defaultVaultSchema().trim();
|
|
4290
4746
|
}
|
|
4291
4747
|
async function readSchemaFile(schemaPath, fallback = defaultVaultSchema()) {
|
|
4292
|
-
const content = await fileExists(schemaPath) ? await
|
|
4748
|
+
const content = await fileExists(schemaPath) ? await fs10.readFile(schemaPath, "utf8") : fallback;
|
|
4293
4749
|
const normalized = normalizeSchemaContent(content);
|
|
4294
4750
|
return {
|
|
4295
4751
|
path: schemaPath,
|
|
@@ -4298,7 +4754,7 @@ async function readSchemaFile(schemaPath, fallback = defaultVaultSchema()) {
|
|
|
4298
4754
|
};
|
|
4299
4755
|
}
|
|
4300
4756
|
function resolveProjectSchemaPath(rootDir, schemaPath) {
|
|
4301
|
-
return
|
|
4757
|
+
return path10.resolve(rootDir, schemaPath);
|
|
4302
4758
|
}
|
|
4303
4759
|
function composeVaultSchema(root, projectSchemas = []) {
|
|
4304
4760
|
if (!projectSchemas.length) {
|
|
@@ -4314,7 +4770,7 @@ function composeVaultSchema(root, projectSchemas = []) {
|
|
|
4314
4770
|
(schema) => [
|
|
4315
4771
|
`## Project Schema`,
|
|
4316
4772
|
"",
|
|
4317
|
-
`Path: ${toPosix(
|
|
4773
|
+
`Path: ${toPosix(path10.relative(path10.dirname(root.path), schema.path) || schema.path)}`,
|
|
4318
4774
|
"",
|
|
4319
4775
|
schema.content
|
|
4320
4776
|
].join("\n")
|
|
@@ -4390,30 +4846,30 @@ function buildSchemaPrompt(schema, instruction) {
|
|
|
4390
4846
|
}
|
|
4391
4847
|
|
|
4392
4848
|
// src/vault.ts
|
|
4393
|
-
import
|
|
4394
|
-
import
|
|
4395
|
-
import
|
|
4396
|
-
import { z as
|
|
4849
|
+
import fs15 from "fs/promises";
|
|
4850
|
+
import path18 from "path";
|
|
4851
|
+
import matter9 from "gray-matter";
|
|
4852
|
+
import { z as z7 } from "zod";
|
|
4397
4853
|
|
|
4398
4854
|
// src/analysis.ts
|
|
4399
|
-
import
|
|
4400
|
-
import { z } from "zod";
|
|
4401
|
-
var ANALYSIS_FORMAT_VERSION =
|
|
4402
|
-
var sourceAnalysisSchema =
|
|
4403
|
-
title:
|
|
4404
|
-
summary:
|
|
4405
|
-
concepts:
|
|
4406
|
-
entities:
|
|
4407
|
-
claims:
|
|
4408
|
-
|
|
4409
|
-
text:
|
|
4410
|
-
confidence:
|
|
4411
|
-
status:
|
|
4412
|
-
polarity:
|
|
4413
|
-
citation:
|
|
4855
|
+
import path11 from "path";
|
|
4856
|
+
import { z as z2 } from "zod";
|
|
4857
|
+
var ANALYSIS_FORMAT_VERSION = 5;
|
|
4858
|
+
var sourceAnalysisSchema = z2.object({
|
|
4859
|
+
title: z2.string().min(1),
|
|
4860
|
+
summary: z2.string().min(1),
|
|
4861
|
+
concepts: z2.array(z2.object({ name: z2.string().min(1), description: z2.string().default("") })).max(12).default([]),
|
|
4862
|
+
entities: z2.array(z2.object({ name: z2.string().min(1), description: z2.string().default("") })).max(12).default([]),
|
|
4863
|
+
claims: z2.array(
|
|
4864
|
+
z2.object({
|
|
4865
|
+
text: z2.string().min(1),
|
|
4866
|
+
confidence: z2.number().min(0).max(1).default(0.6),
|
|
4867
|
+
status: z2.enum(["extracted", "inferred", "conflicted", "stale"]).default("extracted"),
|
|
4868
|
+
polarity: z2.enum(["positive", "negative", "neutral"]).default("neutral"),
|
|
4869
|
+
citation: z2.string().min(1)
|
|
4414
4870
|
})
|
|
4415
4871
|
).max(8).default([]),
|
|
4416
|
-
questions:
|
|
4872
|
+
questions: z2.array(z2.string()).max(6).default([])
|
|
4417
4873
|
});
|
|
4418
4874
|
var STOPWORDS = /* @__PURE__ */ new Set([
|
|
4419
4875
|
"about",
|
|
@@ -4502,6 +4958,7 @@ function heuristicAnalysis(manifest, text, schemaHash) {
|
|
|
4502
4958
|
analysisVersion: ANALYSIS_FORMAT_VERSION,
|
|
4503
4959
|
sourceId: manifest.sourceId,
|
|
4504
4960
|
sourceHash: manifest.contentHash,
|
|
4961
|
+
extractionHash: manifest.extractionHash,
|
|
4505
4962
|
schemaHash,
|
|
4506
4963
|
title: deriveTitle(manifest, text),
|
|
4507
4964
|
summary: firstSentences(normalized, 3) || truncate(normalized, 280) || `Imported ${manifest.sourceKind} source.`,
|
|
@@ -4548,6 +5005,7 @@ ${truncate(text, 18e3)}`
|
|
|
4548
5005
|
analysisVersion: ANALYSIS_FORMAT_VERSION,
|
|
4549
5006
|
sourceId: manifest.sourceId,
|
|
4550
5007
|
sourceHash: manifest.contentHash,
|
|
5008
|
+
extractionHash: manifest.extractionHash,
|
|
4551
5009
|
schemaHash: schema.hash,
|
|
4552
5010
|
title: parsed.title,
|
|
4553
5011
|
summary: parsed.summary,
|
|
@@ -4574,24 +5032,97 @@ ${truncate(text, 18e3)}`
|
|
|
4574
5032
|
producedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
4575
5033
|
};
|
|
4576
5034
|
}
|
|
5035
|
+
function analysisFromVisionExtraction(manifest, extraction, schemaHash) {
|
|
5036
|
+
if (!extraction.vision) {
|
|
5037
|
+
return null;
|
|
5038
|
+
}
|
|
5039
|
+
return {
|
|
5040
|
+
analysisVersion: ANALYSIS_FORMAT_VERSION,
|
|
5041
|
+
sourceId: manifest.sourceId,
|
|
5042
|
+
sourceHash: manifest.contentHash,
|
|
5043
|
+
extractionHash: manifest.extractionHash,
|
|
5044
|
+
schemaHash,
|
|
5045
|
+
title: extraction.vision.title?.trim() || manifest.title,
|
|
5046
|
+
summary: extraction.vision.summary,
|
|
5047
|
+
concepts: extraction.vision.concepts.map((term) => ({
|
|
5048
|
+
id: `concept:${slugify(term.name)}`,
|
|
5049
|
+
name: term.name,
|
|
5050
|
+
description: term.description
|
|
5051
|
+
})),
|
|
5052
|
+
entities: extraction.vision.entities.map((term) => ({
|
|
5053
|
+
id: `entity:${slugify(term.name)}`,
|
|
5054
|
+
name: term.name,
|
|
5055
|
+
description: term.description
|
|
5056
|
+
})),
|
|
5057
|
+
claims: extraction.vision.claims.map((claim, index) => ({
|
|
5058
|
+
id: `claim:${manifest.sourceId}:${index + 1}`,
|
|
5059
|
+
text: claim.text,
|
|
5060
|
+
confidence: claim.confidence,
|
|
5061
|
+
status: "extracted",
|
|
5062
|
+
polarity: claim.polarity,
|
|
5063
|
+
citation: manifest.sourceId
|
|
5064
|
+
})),
|
|
5065
|
+
questions: extraction.vision.questions,
|
|
5066
|
+
rationales: [],
|
|
5067
|
+
producedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
5068
|
+
};
|
|
5069
|
+
}
|
|
5070
|
+
function extractionWarningSummary(manifest, extraction) {
|
|
5071
|
+
const warning = extraction?.warnings?.find(Boolean);
|
|
5072
|
+
if (warning) {
|
|
5073
|
+
return `Imported ${manifest.sourceKind} source. ${warning}`;
|
|
5074
|
+
}
|
|
5075
|
+
return `Imported ${manifest.sourceKind} source. Text extraction is not yet available for this source.`;
|
|
5076
|
+
}
|
|
4577
5077
|
async function analyzeSource(manifest, extractedText, provider, paths, schema) {
|
|
4578
|
-
const cachePath =
|
|
5078
|
+
const cachePath = path11.join(paths.analysesDir, `${manifest.sourceId}.json`);
|
|
4579
5079
|
const cached = await readJsonFile(cachePath);
|
|
4580
|
-
if (cached && cached.analysisVersion === ANALYSIS_FORMAT_VERSION && cached.sourceHash === manifest.contentHash && cached.schemaHash === schema.hash) {
|
|
5080
|
+
if (cached && cached.analysisVersion === ANALYSIS_FORMAT_VERSION && cached.sourceHash === manifest.contentHash && cached.extractionHash === manifest.extractionHash && cached.schemaHash === schema.hash) {
|
|
4581
5081
|
return cached;
|
|
4582
5082
|
}
|
|
5083
|
+
const extraction = await readExtractionArtifact(paths.rootDir, manifest);
|
|
4583
5084
|
const content = normalizeWhitespace(extractedText ?? "");
|
|
4584
5085
|
let analysis;
|
|
4585
5086
|
if (manifest.sourceKind === "code" && content) {
|
|
4586
5087
|
analysis = await analyzeCodeSource(manifest, extractedText ?? "", schema.hash);
|
|
5088
|
+
} else if (manifest.sourceKind === "image") {
|
|
5089
|
+
const visionAnalysis = extraction ? analysisFromVisionExtraction(manifest, extraction, schema.hash) : null;
|
|
5090
|
+
if (visionAnalysis) {
|
|
5091
|
+
analysis = visionAnalysis;
|
|
5092
|
+
} else if (!content) {
|
|
5093
|
+
analysis = {
|
|
5094
|
+
analysisVersion: ANALYSIS_FORMAT_VERSION,
|
|
5095
|
+
sourceId: manifest.sourceId,
|
|
5096
|
+
sourceHash: manifest.contentHash,
|
|
5097
|
+
extractionHash: manifest.extractionHash,
|
|
5098
|
+
schemaHash: schema.hash,
|
|
5099
|
+
title: manifest.title,
|
|
5100
|
+
summary: extractionWarningSummary(manifest, extraction),
|
|
5101
|
+
concepts: [],
|
|
5102
|
+
entities: [],
|
|
5103
|
+
claims: [],
|
|
5104
|
+
questions: [],
|
|
5105
|
+
rationales: [],
|
|
5106
|
+
producedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
5107
|
+
};
|
|
5108
|
+
} else if (provider.type === "heuristic") {
|
|
5109
|
+
analysis = heuristicAnalysis(manifest, content, schema.hash);
|
|
5110
|
+
} else {
|
|
5111
|
+
try {
|
|
5112
|
+
analysis = await providerAnalysis(manifest, content, provider, schema);
|
|
5113
|
+
} catch {
|
|
5114
|
+
analysis = heuristicAnalysis(manifest, content, schema.hash);
|
|
5115
|
+
}
|
|
5116
|
+
}
|
|
4587
5117
|
} else if (!content) {
|
|
4588
5118
|
analysis = {
|
|
4589
5119
|
analysisVersion: ANALYSIS_FORMAT_VERSION,
|
|
4590
5120
|
sourceId: manifest.sourceId,
|
|
4591
5121
|
sourceHash: manifest.contentHash,
|
|
5122
|
+
extractionHash: manifest.extractionHash,
|
|
4592
5123
|
schemaHash: schema.hash,
|
|
4593
5124
|
title: manifest.title,
|
|
4594
|
-
summary:
|
|
5125
|
+
summary: extractionWarningSummary(manifest, extraction),
|
|
4595
5126
|
concepts: [],
|
|
4596
5127
|
entities: [],
|
|
4597
5128
|
claims: [],
|
|
@@ -4624,6 +5155,7 @@ var DEFAULT_BENCHMARK_QUESTIONS = [
|
|
|
4624
5155
|
"Where are the biggest knowledge gaps?",
|
|
4625
5156
|
"What evidence should I read first?"
|
|
4626
5157
|
];
|
|
5158
|
+
var RESEARCH_BENCHMARK_QUESTION = "Which research sources should I read first, and why?";
|
|
4627
5159
|
function nodeMap(graph) {
|
|
4628
5160
|
return new Map(graph.nodes.map((node) => [node.id, node]));
|
|
4629
5161
|
}
|
|
@@ -4673,9 +5205,68 @@ function benchmarkQueryTokens(graph, queryResult, pageContentsById) {
|
|
|
4673
5205
|
queryTokens,
|
|
4674
5206
|
reduction: 0,
|
|
4675
5207
|
visitedNodeIds: queryResult.visitedNodeIds,
|
|
5208
|
+
visitedEdgeIds: queryResult.visitedEdgeIds,
|
|
4676
5209
|
pageIds: queryResult.pageIds
|
|
4677
5210
|
};
|
|
4678
5211
|
}
|
|
5212
|
+
function graphHash(graph) {
|
|
5213
|
+
const hashedPages = graph.pages.filter((page) => page.kind !== "graph_report" && page.kind !== "community_summary");
|
|
5214
|
+
const normalized = JSON.stringify(
|
|
5215
|
+
{
|
|
5216
|
+
nodes: [...graph.nodes].map((node) => ({
|
|
5217
|
+
id: node.id,
|
|
5218
|
+
type: node.type,
|
|
5219
|
+
label: node.label,
|
|
5220
|
+
pageId: node.pageId ?? null,
|
|
5221
|
+
communityId: node.communityId ?? null,
|
|
5222
|
+
degree: node.degree ?? null,
|
|
5223
|
+
bridgeScore: node.bridgeScore ?? null,
|
|
5224
|
+
isGodNode: node.isGodNode ?? false,
|
|
5225
|
+
sourceIds: [...node.sourceIds].sort(),
|
|
5226
|
+
projectIds: [...node.projectIds].sort()
|
|
5227
|
+
})).sort((left, right) => left.id.localeCompare(right.id)),
|
|
5228
|
+
edges: [...graph.edges].map((edge) => ({
|
|
5229
|
+
id: edge.id,
|
|
5230
|
+
source: edge.source,
|
|
5231
|
+
target: edge.target,
|
|
5232
|
+
relation: edge.relation,
|
|
5233
|
+
status: edge.status,
|
|
5234
|
+
evidenceClass: edge.evidenceClass,
|
|
5235
|
+
confidence: edge.confidence,
|
|
5236
|
+
provenance: [...edge.provenance].sort()
|
|
5237
|
+
})).sort((left, right) => left.id.localeCompare(right.id)),
|
|
5238
|
+
pages: [...hashedPages].map((page) => ({
|
|
5239
|
+
id: page.id,
|
|
5240
|
+
path: page.path,
|
|
5241
|
+
kind: page.kind,
|
|
5242
|
+
status: page.status,
|
|
5243
|
+
sourceType: page.sourceType ?? null,
|
|
5244
|
+
sourceIds: [...page.sourceIds].sort(),
|
|
5245
|
+
projectIds: [...page.projectIds].sort(),
|
|
5246
|
+
nodeIds: [...page.nodeIds].sort()
|
|
5247
|
+
})).sort((left, right) => left.id.localeCompare(right.id)),
|
|
5248
|
+
communities: [...graph.communities ?? []].map((community) => ({
|
|
5249
|
+
id: community.id,
|
|
5250
|
+
label: community.label,
|
|
5251
|
+
nodeIds: [...community.nodeIds].sort()
|
|
5252
|
+
})).sort((left, right) => left.id.localeCompare(right.id))
|
|
5253
|
+
},
|
|
5254
|
+
null,
|
|
5255
|
+
0
|
|
5256
|
+
);
|
|
5257
|
+
return sha256(normalized);
|
|
5258
|
+
}
|
|
5259
|
+
function hasResearchSources(pages) {
|
|
5260
|
+
return pages.some((page) => page.kind === "source" && Boolean(page.sourceType) && page.sourceType !== "url");
|
|
5261
|
+
}
|
|
5262
|
+
function defaultBenchmarkQuestionsForGraph(graph, maxQuestions = 3) {
|
|
5263
|
+
const normalizedLimit = Math.max(1, Math.min(maxQuestions, DEFAULT_BENCHMARK_QUESTIONS.length));
|
|
5264
|
+
const questions = [...DEFAULT_BENCHMARK_QUESTIONS];
|
|
5265
|
+
if (hasResearchSources(graph.pages)) {
|
|
5266
|
+
questions.unshift(RESEARCH_BENCHMARK_QUESTION);
|
|
5267
|
+
}
|
|
5268
|
+
return uniqueBy(questions, (item) => item).slice(0, normalizedLimit);
|
|
5269
|
+
}
|
|
4679
5270
|
function buildBenchmarkArtifact(input) {
|
|
4680
5271
|
const corpusTokens = Math.max(1, Math.round(input.corpusWords * (100 / 75)));
|
|
4681
5272
|
const perQuestion = input.perQuestion.filter((entry) => entry.queryTokens > 0).map((entry) => ({
|
|
@@ -4684,8 +5275,18 @@ function buildBenchmarkArtifact(input) {
|
|
|
4684
5275
|
}));
|
|
4685
5276
|
const avgQueryTokens = perQuestion.length ? Math.max(1, Math.round(perQuestion.reduce((total, entry) => total + entry.queryTokens, 0) / perQuestion.length)) : 0;
|
|
4686
5277
|
const reductionRatio = avgQueryTokens ? Number(Math.max(0, 1 - avgQueryTokens / Math.max(1, corpusTokens)).toFixed(3)) : 0;
|
|
5278
|
+
const uniqueVisitedNodes = new Set(perQuestion.flatMap((entry) => entry.visitedNodeIds)).size;
|
|
5279
|
+
const summary = {
|
|
5280
|
+
questionCount: input.questions.length,
|
|
5281
|
+
uniqueVisitedNodes,
|
|
5282
|
+
finalContextTokens: avgQueryTokens,
|
|
5283
|
+
naiveCorpusTokens: corpusTokens,
|
|
5284
|
+
avgReduction: reductionRatio,
|
|
5285
|
+
reductionRatio
|
|
5286
|
+
};
|
|
4687
5287
|
return {
|
|
4688
5288
|
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
5289
|
+
graphHash: graphHash(input.graph),
|
|
4689
5290
|
corpusWords: input.corpusWords,
|
|
4690
5291
|
corpusTokens,
|
|
4691
5292
|
nodes: input.graph.nodes.length,
|
|
@@ -4693,7 +5294,9 @@ function buildBenchmarkArtifact(input) {
|
|
|
4693
5294
|
avgQueryTokens,
|
|
4694
5295
|
reductionRatio,
|
|
4695
5296
|
sampleQuestions: input.questions,
|
|
4696
|
-
perQuestion
|
|
5297
|
+
perQuestion,
|
|
5298
|
+
questionResults: perQuestion,
|
|
5299
|
+
summary
|
|
4697
5300
|
};
|
|
4698
5301
|
}
|
|
4699
5302
|
|
|
@@ -4714,10 +5317,10 @@ function conflictConfidence(claimA, claimB) {
|
|
|
4714
5317
|
}
|
|
4715
5318
|
|
|
4716
5319
|
// src/deep-lint.ts
|
|
4717
|
-
import
|
|
4718
|
-
import
|
|
4719
|
-
import
|
|
4720
|
-
import { z as
|
|
5320
|
+
import fs11 from "fs/promises";
|
|
5321
|
+
import path14 from "path";
|
|
5322
|
+
import matter4 from "gray-matter";
|
|
5323
|
+
import { z as z5 } from "zod";
|
|
4721
5324
|
|
|
4722
5325
|
// src/findings.ts
|
|
4723
5326
|
function normalizeFindingSeverity(value) {
|
|
@@ -4736,25 +5339,25 @@ function normalizeFindingSeverity(value) {
|
|
|
4736
5339
|
|
|
4737
5340
|
// src/orchestration.ts
|
|
4738
5341
|
import { spawn } from "child_process";
|
|
4739
|
-
import
|
|
4740
|
-
import { z as
|
|
4741
|
-
var orchestrationRoleResultSchema =
|
|
4742
|
-
summary:
|
|
4743
|
-
findings:
|
|
4744
|
-
|
|
4745
|
-
severity:
|
|
4746
|
-
message:
|
|
4747
|
-
relatedPageIds:
|
|
4748
|
-
relatedSourceIds:
|
|
4749
|
-
suggestedQuery:
|
|
5342
|
+
import path12 from "path";
|
|
5343
|
+
import { z as z3 } from "zod";
|
|
5344
|
+
var orchestrationRoleResultSchema = z3.object({
|
|
5345
|
+
summary: z3.string().optional(),
|
|
5346
|
+
findings: z3.array(
|
|
5347
|
+
z3.object({
|
|
5348
|
+
severity: z3.string().optional().default("info"),
|
|
5349
|
+
message: z3.string().min(1),
|
|
5350
|
+
relatedPageIds: z3.array(z3.string()).optional(),
|
|
5351
|
+
relatedSourceIds: z3.array(z3.string()).optional(),
|
|
5352
|
+
suggestedQuery: z3.string().optional()
|
|
4750
5353
|
})
|
|
4751
5354
|
).default([]),
|
|
4752
|
-
questions:
|
|
4753
|
-
proposals:
|
|
4754
|
-
|
|
4755
|
-
path:
|
|
4756
|
-
content:
|
|
4757
|
-
reason:
|
|
5355
|
+
questions: z3.array(z3.string().min(1)).default([]),
|
|
5356
|
+
proposals: z3.array(
|
|
5357
|
+
z3.object({
|
|
5358
|
+
path: z3.string().min(1),
|
|
5359
|
+
content: z3.string().min(1),
|
|
5360
|
+
reason: z3.string().min(1)
|
|
4758
5361
|
})
|
|
4759
5362
|
).default([])
|
|
4760
5363
|
});
|
|
@@ -4829,7 +5432,7 @@ async function runProviderRole(rootDir, role, roleConfig, input) {
|
|
|
4829
5432
|
}
|
|
4830
5433
|
async function runCommandRole(rootDir, role, executor, input) {
|
|
4831
5434
|
const [command, ...args] = executor.command;
|
|
4832
|
-
const cwd = executor.cwd ?
|
|
5435
|
+
const cwd = executor.cwd ? path12.resolve(rootDir, executor.cwd) : rootDir;
|
|
4833
5436
|
const child = spawn(command, args, {
|
|
4834
5437
|
cwd,
|
|
4835
5438
|
env: {
|
|
@@ -4923,9 +5526,9 @@ function summarizeRoleQuestions(results) {
|
|
|
4923
5526
|
}
|
|
4924
5527
|
|
|
4925
5528
|
// src/web-search/registry.ts
|
|
4926
|
-
import
|
|
5529
|
+
import path13 from "path";
|
|
4927
5530
|
import { pathToFileURL } from "url";
|
|
4928
|
-
import { z as
|
|
5531
|
+
import { z as z4 } from "zod";
|
|
4929
5532
|
|
|
4930
5533
|
// src/web-search/http-json.ts
|
|
4931
5534
|
function deepGet(value, pathValue) {
|
|
@@ -5007,10 +5610,10 @@ var HttpJsonWebSearchAdapter = class {
|
|
|
5007
5610
|
};
|
|
5008
5611
|
|
|
5009
5612
|
// src/web-search/registry.ts
|
|
5010
|
-
var customWebSearchModuleSchema =
|
|
5011
|
-
createAdapter:
|
|
5012
|
-
input: [
|
|
5013
|
-
output:
|
|
5613
|
+
var customWebSearchModuleSchema = z4.object({
|
|
5614
|
+
createAdapter: z4.function({
|
|
5615
|
+
input: [z4.string(), z4.custom(), z4.string()],
|
|
5616
|
+
output: z4.promise(z4.custom())
|
|
5014
5617
|
})
|
|
5015
5618
|
});
|
|
5016
5619
|
async function createWebSearchAdapter(id, config, rootDir) {
|
|
@@ -5021,7 +5624,7 @@ async function createWebSearchAdapter(id, config, rootDir) {
|
|
|
5021
5624
|
if (!config.module) {
|
|
5022
5625
|
throw new Error(`Web search provider ${id} is type "custom" but no module path was configured.`);
|
|
5023
5626
|
}
|
|
5024
|
-
const resolvedModule =
|
|
5627
|
+
const resolvedModule = path13.isAbsolute(config.module) ? config.module : path13.resolve(rootDir, config.module);
|
|
5025
5628
|
const loaded = await import(pathToFileURL(resolvedModule).href);
|
|
5026
5629
|
const parsed = customWebSearchModuleSchema.parse(loaded);
|
|
5027
5630
|
return parsed.createAdapter(id, config, rootDir);
|
|
@@ -5045,15 +5648,15 @@ async function getWebSearchAdapterForTask(rootDir, task) {
|
|
|
5045
5648
|
}
|
|
5046
5649
|
|
|
5047
5650
|
// src/deep-lint.ts
|
|
5048
|
-
var deepLintResponseSchema =
|
|
5049
|
-
findings:
|
|
5050
|
-
|
|
5051
|
-
severity:
|
|
5052
|
-
code:
|
|
5053
|
-
message:
|
|
5054
|
-
relatedSourceIds:
|
|
5055
|
-
relatedPageIds:
|
|
5056
|
-
suggestedQuery:
|
|
5651
|
+
var deepLintResponseSchema = z5.object({
|
|
5652
|
+
findings: z5.array(
|
|
5653
|
+
z5.object({
|
|
5654
|
+
severity: z5.string().optional().default("info"),
|
|
5655
|
+
code: z5.enum(["coverage_gap", "contradiction_candidate", "missing_citation", "candidate_page", "follow_up_question"]),
|
|
5656
|
+
message: z5.string().min(1),
|
|
5657
|
+
relatedSourceIds: z5.array(z5.string()).default([]),
|
|
5658
|
+
relatedPageIds: z5.array(z5.string()).default([]),
|
|
5659
|
+
suggestedQuery: z5.string().optional()
|
|
5057
5660
|
})
|
|
5058
5661
|
).max(20)
|
|
5059
5662
|
});
|
|
@@ -5081,9 +5684,9 @@ async function loadContextPages(rootDir, graph) {
|
|
|
5081
5684
|
);
|
|
5082
5685
|
return Promise.all(
|
|
5083
5686
|
contextPages.slice(0, 18).map(async (page) => {
|
|
5084
|
-
const absolutePath =
|
|
5085
|
-
const raw = await
|
|
5086
|
-
const parsed =
|
|
5687
|
+
const absolutePath = path14.join(paths.wikiDir, page.path);
|
|
5688
|
+
const raw = await fs11.readFile(absolutePath, "utf8").catch(() => "");
|
|
5689
|
+
const parsed = matter4(raw);
|
|
5087
5690
|
return {
|
|
5088
5691
|
id: page.id,
|
|
5089
5692
|
title: page.title,
|
|
@@ -5130,7 +5733,7 @@ function heuristicDeepFindings(contextPages, structuralFindings, graph) {
|
|
|
5130
5733
|
code: "missing_citation",
|
|
5131
5734
|
message: finding.message,
|
|
5132
5735
|
pagePath: finding.pagePath,
|
|
5133
|
-
suggestedQuery: finding.pagePath ? `Which sources support the claims in ${
|
|
5736
|
+
suggestedQuery: finding.pagePath ? `Which sources support the claims in ${path14.basename(finding.pagePath, ".md")}?` : void 0
|
|
5134
5737
|
});
|
|
5135
5738
|
}
|
|
5136
5739
|
for (const page of contextPages.filter((item) => item.kind === "source").slice(0, 3)) {
|
|
@@ -5611,12 +6214,15 @@ function topGodNodes(graph, limit = 10) {
|
|
|
5611
6214
|
}
|
|
5612
6215
|
|
|
5613
6216
|
// src/markdown.ts
|
|
5614
|
-
import
|
|
5615
|
-
function
|
|
6217
|
+
import matter5 from "gray-matter";
|
|
6218
|
+
function uniqueStrings2(values) {
|
|
5616
6219
|
return uniqueBy(values.filter(Boolean), (value) => value);
|
|
5617
6220
|
}
|
|
6221
|
+
function safeFrontmatter(value) {
|
|
6222
|
+
return JSON.parse(JSON.stringify(value));
|
|
6223
|
+
}
|
|
5618
6224
|
function decoratedTags(baseTags, decorations) {
|
|
5619
|
-
return
|
|
6225
|
+
return uniqueStrings2([
|
|
5620
6226
|
...baseTags,
|
|
5621
6227
|
...(decorations?.projectIds ?? []).map((projectId) => `project/${projectId}`),
|
|
5622
6228
|
...decorations?.extraTags ?? []
|
|
@@ -5695,6 +6301,7 @@ function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutput
|
|
|
5695
6301
|
page_id: pageId,
|
|
5696
6302
|
kind: "source",
|
|
5697
6303
|
title: analysis.title,
|
|
6304
|
+
...manifest.sourceType ? { source_type: manifest.sourceType } : {},
|
|
5698
6305
|
tags: decoratedTags(analysis.code ? ["source", "code"] : ["source"], decorations),
|
|
5699
6306
|
source_ids: [manifest.sourceId],
|
|
5700
6307
|
project_ids: decorations?.projectIds ?? [],
|
|
@@ -5717,6 +6324,7 @@ function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutput
|
|
|
5717
6324
|
"",
|
|
5718
6325
|
`Source ID: \`${manifest.sourceId}\``,
|
|
5719
6326
|
manifest.url ? `Source URL: ${manifest.url}` : `Source Path: \`${manifest.originalPath ?? manifest.storedPath}\``,
|
|
6327
|
+
...manifest.sourceType ? [`Source Type: \`${manifest.sourceType}\``, ""] : [""],
|
|
5720
6328
|
"",
|
|
5721
6329
|
"## Summary",
|
|
5722
6330
|
"",
|
|
@@ -5761,6 +6369,7 @@ function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutput
|
|
|
5761
6369
|
path: relativePath,
|
|
5762
6370
|
title: analysis.title,
|
|
5763
6371
|
kind: "source",
|
|
6372
|
+
sourceType: manifest.sourceType,
|
|
5764
6373
|
sourceIds: [manifest.sourceId],
|
|
5765
6374
|
projectIds: decorations?.projectIds ?? [],
|
|
5766
6375
|
nodeIds,
|
|
@@ -5778,7 +6387,7 @@ function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutput
|
|
|
5778
6387
|
compiledFrom: metadata.compiledFrom,
|
|
5779
6388
|
managedBy: metadata.managedBy
|
|
5780
6389
|
},
|
|
5781
|
-
content:
|
|
6390
|
+
content: matter5.stringify(body, safeFrontmatter(frontmatter))
|
|
5782
6391
|
};
|
|
5783
6392
|
}
|
|
5784
6393
|
function buildModulePage(input) {
|
|
@@ -5793,7 +6402,7 @@ function buildModulePage(input) {
|
|
|
5793
6402
|
const nodeIds = [code.moduleId, ...code.symbols.map((symbol) => symbol.id)];
|
|
5794
6403
|
const localModuleBacklinks = input.localModules.map((moduleRef) => moduleRef.page.id);
|
|
5795
6404
|
const relatedOutputs = input.relatedOutputs ?? [];
|
|
5796
|
-
const backlinks =
|
|
6405
|
+
const backlinks = uniqueStrings2([sourcePage.id, ...localModuleBacklinks, ...relatedOutputs.map((page) => page.id)]);
|
|
5797
6406
|
const importsSection = code.imports.length ? code.imports.map((item) => {
|
|
5798
6407
|
const localModule = item.resolvedSourceId ? input.localModules.find((moduleRef) => moduleRef.sourceId === item.resolvedSourceId && moduleRef.reExport === item.reExport) : void 0;
|
|
5799
6408
|
const importedBits = [
|
|
@@ -5839,9 +6448,9 @@ function buildModulePage(input) {
|
|
|
5839
6448
|
source_hashes: {
|
|
5840
6449
|
[manifest.sourceId]: manifest.contentHash
|
|
5841
6450
|
},
|
|
5842
|
-
related_page_ids:
|
|
6451
|
+
related_page_ids: uniqueStrings2([sourcePage.id, ...localModuleBacklinks, ...relatedOutputs.map((page) => page.id)]),
|
|
5843
6452
|
related_node_ids: [],
|
|
5844
|
-
related_source_ids:
|
|
6453
|
+
related_source_ids: uniqueStrings2([
|
|
5845
6454
|
manifest.sourceId,
|
|
5846
6455
|
...input.localModules.map((moduleRef) => moduleRef.sourceId),
|
|
5847
6456
|
...relatedOutputs.flatMap((page) => page.sourceIds)
|
|
@@ -5913,9 +6522,9 @@ function buildModulePage(input) {
|
|
|
5913
6522
|
backlinks,
|
|
5914
6523
|
schemaHash,
|
|
5915
6524
|
sourceHashes: { [manifest.sourceId]: manifest.contentHash },
|
|
5916
|
-
relatedPageIds:
|
|
6525
|
+
relatedPageIds: uniqueStrings2([sourcePage.id, ...localModuleBacklinks, ...relatedOutputs.map((page) => page.id)]),
|
|
5917
6526
|
relatedNodeIds: [],
|
|
5918
|
-
relatedSourceIds:
|
|
6527
|
+
relatedSourceIds: uniqueStrings2([
|
|
5919
6528
|
manifest.sourceId,
|
|
5920
6529
|
...input.localModules.map((moduleRef) => moduleRef.sourceId),
|
|
5921
6530
|
...relatedOutputs.flatMap((page) => page.sourceIds)
|
|
@@ -5925,7 +6534,7 @@ function buildModulePage(input) {
|
|
|
5925
6534
|
compiledFrom: metadata.compiledFrom,
|
|
5926
6535
|
managedBy: metadata.managedBy
|
|
5927
6536
|
},
|
|
5928
|
-
content:
|
|
6537
|
+
content: matter5.stringify(body, frontmatter)
|
|
5929
6538
|
};
|
|
5930
6539
|
}
|
|
5931
6540
|
function buildAggregatePage(kind, name, descriptions, sourceAnalyses, sourceHashes, schemaHash, metadata, relativePath, relatedOutputs = [], decorations) {
|
|
@@ -5996,7 +6605,7 @@ function buildAggregatePage(kind, name, descriptions, sourceAnalyses, sourceHash
|
|
|
5996
6605
|
compiledFrom: metadata.compiledFrom,
|
|
5997
6606
|
managedBy: metadata.managedBy
|
|
5998
6607
|
},
|
|
5999
|
-
content:
|
|
6608
|
+
content: matter5.stringify(body, frontmatter)
|
|
6000
6609
|
};
|
|
6001
6610
|
}
|
|
6002
6611
|
function buildIndexPage(pages, schemaHash, metadata, projectPages = []) {
|
|
@@ -6072,7 +6681,7 @@ function buildIndexPage(pages, schemaHash, metadata, projectPages = []) {
|
|
|
6072
6681
|
}
|
|
6073
6682
|
function buildSectionIndex(kind, pages, schemaHash, metadata, projectIds = []) {
|
|
6074
6683
|
const title = kind.charAt(0).toUpperCase() + kind.slice(1);
|
|
6075
|
-
return
|
|
6684
|
+
return matter5.stringify(
|
|
6076
6685
|
[`# ${title}`, "", ...pages.map((page) => `- [[${page.path.replace(/\.md$/, "")}|${page.title}]]`), ""].join("\n"),
|
|
6077
6686
|
{
|
|
6078
6687
|
page_id: `${kind}:index`,
|
|
@@ -6114,27 +6723,118 @@ function crossCommunityEdges(graph) {
|
|
|
6114
6723
|
function suggestedGraphQuestions(graph) {
|
|
6115
6724
|
const thinCommunities = (graph.communities ?? []).filter((community) => community.nodeIds.length <= 2);
|
|
6116
6725
|
const bridgeNodes = graph.nodes.filter((node) => (node.bridgeScore ?? 0) > 0).sort((left, right) => (right.bridgeScore ?? 0) - (left.bridgeScore ?? 0)).slice(0, 3);
|
|
6117
|
-
return
|
|
6726
|
+
return uniqueStrings2([
|
|
6118
6727
|
...thinCommunities.map((community) => `What sources would strengthen community ${community.label}?`),
|
|
6119
6728
|
...bridgeNodes.map((node) => `Why does ${node.label} connect multiple communities in the vault?`)
|
|
6120
6729
|
]).slice(0, 6);
|
|
6121
6730
|
}
|
|
6731
|
+
function buildGraphReportArtifact(input) {
|
|
6732
|
+
const nodesById = new Map(input.graph.nodes.map((node) => [node.id, node]));
|
|
6733
|
+
const godNodes = input.graph.nodes.filter((node) => node.isGodNode).sort((left, right) => (right.degree ?? 0) - (left.degree ?? 0)).slice(0, 8);
|
|
6734
|
+
const bridgeNodes = input.graph.nodes.filter((node) => (node.bridgeScore ?? 0) > 0).sort((left, right) => (right.bridgeScore ?? 0) - (left.bridgeScore ?? 0)).slice(0, 8);
|
|
6735
|
+
const thinCommunities = (input.graph.communities ?? []).filter((community) => community.nodeIds.length <= 2).map((community) => {
|
|
6736
|
+
const page = input.communityPages.find((candidate) => candidate.id === `graph:${community.id}`);
|
|
6737
|
+
return {
|
|
6738
|
+
id: community.id,
|
|
6739
|
+
label: community.label,
|
|
6740
|
+
nodeCount: community.nodeIds.length,
|
|
6741
|
+
pageId: page?.id,
|
|
6742
|
+
path: page?.path,
|
|
6743
|
+
title: page?.title
|
|
6744
|
+
};
|
|
6745
|
+
});
|
|
6746
|
+
const surprisingConnections = crossCommunityEdges(input.graph).slice(0, 8).map((edge) => {
|
|
6747
|
+
const source = nodesById.get(edge.source);
|
|
6748
|
+
const target = nodesById.get(edge.target);
|
|
6749
|
+
const path23 = shortestGraphPath(input.graph, edge.source, edge.target);
|
|
6750
|
+
const sourceCommunity = source?.communityId ? input.graph.communities?.find((community) => community.id === source.communityId) : void 0;
|
|
6751
|
+
const targetCommunity = target?.communityId ? input.graph.communities?.find((community) => community.id === target.communityId) : void 0;
|
|
6752
|
+
return {
|
|
6753
|
+
id: edge.id,
|
|
6754
|
+
sourceNodeId: edge.source,
|
|
6755
|
+
sourceLabel: source?.label ?? edge.source,
|
|
6756
|
+
targetNodeId: edge.target,
|
|
6757
|
+
targetLabel: target?.label ?? edge.target,
|
|
6758
|
+
relation: edge.relation,
|
|
6759
|
+
evidenceClass: edge.evidenceClass,
|
|
6760
|
+
confidence: edge.confidence,
|
|
6761
|
+
pathNodeIds: path23.nodeIds,
|
|
6762
|
+
pathEdgeIds: path23.edgeIds,
|
|
6763
|
+
pathSummary: path23.summary,
|
|
6764
|
+
explanation: normalizeWhitespace(
|
|
6765
|
+
[
|
|
6766
|
+
`${source?.label ?? edge.source} links ${sourceCommunity?.label ? `from ${sourceCommunity.label}` : ""}`.trim(),
|
|
6767
|
+
`to ${target?.label ?? edge.target}${targetCommunity?.label ? ` in ${targetCommunity.label}` : ""}`.trim(),
|
|
6768
|
+
`through ${edge.relation} with ${edge.evidenceClass} evidence at ${edge.confidence.toFixed(2)} confidence.`
|
|
6769
|
+
].join(" ")
|
|
6770
|
+
)
|
|
6771
|
+
};
|
|
6772
|
+
});
|
|
6773
|
+
return {
|
|
6774
|
+
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
6775
|
+
graphHash: input.graphHash,
|
|
6776
|
+
overview: {
|
|
6777
|
+
nodes: input.graph.nodes.length,
|
|
6778
|
+
edges: input.graph.edges.length,
|
|
6779
|
+
pages: input.graph.pages.length,
|
|
6780
|
+
communities: input.graph.communities?.length ?? 0
|
|
6781
|
+
},
|
|
6782
|
+
benchmark: input.benchmark ? {
|
|
6783
|
+
generatedAt: input.benchmark.generatedAt,
|
|
6784
|
+
stale: input.benchmarkStale ?? false,
|
|
6785
|
+
summary: input.benchmark.summary,
|
|
6786
|
+
questionCount: input.benchmark.sampleQuestions.length
|
|
6787
|
+
} : void 0,
|
|
6788
|
+
godNodes: godNodes.map((node) => ({
|
|
6789
|
+
nodeId: node.id,
|
|
6790
|
+
label: node.label,
|
|
6791
|
+
pageId: node.pageId,
|
|
6792
|
+
degree: node.degree,
|
|
6793
|
+
bridgeScore: node.bridgeScore
|
|
6794
|
+
})),
|
|
6795
|
+
bridgeNodes: bridgeNodes.map((node) => ({
|
|
6796
|
+
nodeId: node.id,
|
|
6797
|
+
label: node.label,
|
|
6798
|
+
pageId: node.pageId,
|
|
6799
|
+
degree: node.degree,
|
|
6800
|
+
bridgeScore: node.bridgeScore
|
|
6801
|
+
})),
|
|
6802
|
+
thinCommunities,
|
|
6803
|
+
surprisingConnections,
|
|
6804
|
+
suggestedQuestions: suggestedGraphQuestions(input.graph),
|
|
6805
|
+
communityPages: input.communityPages.map((page) => ({
|
|
6806
|
+
id: page.id,
|
|
6807
|
+
path: page.path,
|
|
6808
|
+
title: page.title
|
|
6809
|
+
})),
|
|
6810
|
+
recentResearchSources: (input.recentResearchSources ?? []).map((page) => ({
|
|
6811
|
+
pageId: page.id,
|
|
6812
|
+
path: page.path,
|
|
6813
|
+
title: page.title,
|
|
6814
|
+
sourceType: page.sourceType,
|
|
6815
|
+
updatedAt: page.updatedAt
|
|
6816
|
+
}))
|
|
6817
|
+
};
|
|
6818
|
+
}
|
|
6122
6819
|
function buildGraphReportPage(input) {
|
|
6123
6820
|
const pageId = "graph:report";
|
|
6124
6821
|
const pathValue = pagePathFor("graph_report", "report");
|
|
6125
6822
|
const pagesById = new Map(input.graph.pages.map((page) => [page.id, page]));
|
|
6126
6823
|
const nodesById = new Map(input.graph.nodes.map((node) => [node.id, node]));
|
|
6127
|
-
const
|
|
6128
|
-
|
|
6129
|
-
|
|
6130
|
-
|
|
6131
|
-
const
|
|
6132
|
-
|
|
6133
|
-
...
|
|
6134
|
-
...
|
|
6135
|
-
...input.
|
|
6824
|
+
const relatedNodeIds = uniqueStrings2([
|
|
6825
|
+
...input.report.godNodes.map((node) => node.nodeId),
|
|
6826
|
+
...input.report.bridgeNodes.map((node) => node.nodeId)
|
|
6827
|
+
]);
|
|
6828
|
+
const relatedPageIds = uniqueStrings2([
|
|
6829
|
+
...input.report.godNodes.map((node) => node.pageId ?? ""),
|
|
6830
|
+
...input.report.bridgeNodes.map((node) => node.pageId ?? ""),
|
|
6831
|
+
...input.report.communityPages.map((page) => page.id),
|
|
6832
|
+
...input.report.recentResearchSources.map((page) => page.pageId)
|
|
6833
|
+
]);
|
|
6834
|
+
const relatedSourceIds = uniqueStrings2([
|
|
6835
|
+
...relatedNodeIds.flatMap((nodeId) => nodesById.get(nodeId)?.sourceIds ?? []),
|
|
6836
|
+
...input.report.recentResearchSources.flatMap((page) => pagesById.get(page.pageId)?.sourceIds ?? [])
|
|
6136
6837
|
]);
|
|
6137
|
-
const relatedSourceIds = uniqueStrings(relatedNodeIds.flatMap((nodeId) => nodesById.get(nodeId)?.sourceIds ?? []));
|
|
6138
6838
|
const frontmatter = {
|
|
6139
6839
|
page_id: pageId,
|
|
6140
6840
|
kind: "graph_report",
|
|
@@ -6162,47 +6862,66 @@ function buildGraphReportPage(input) {
|
|
|
6162
6862
|
"",
|
|
6163
6863
|
"## Overview",
|
|
6164
6864
|
"",
|
|
6165
|
-
`- Nodes: ${input.
|
|
6166
|
-
`- Edges: ${input.
|
|
6167
|
-
`- Pages: ${input.
|
|
6168
|
-
`- Communities: ${input.
|
|
6865
|
+
`- Nodes: ${input.report.overview.nodes}`,
|
|
6866
|
+
`- Edges: ${input.report.overview.edges}`,
|
|
6867
|
+
`- Pages: ${input.report.overview.pages}`,
|
|
6868
|
+
`- Communities: ${input.report.overview.communities}`,
|
|
6169
6869
|
"",
|
|
6170
|
-
|
|
6171
|
-
|
|
6172
|
-
|
|
6173
|
-
`-
|
|
6174
|
-
`-
|
|
6175
|
-
`-
|
|
6176
|
-
`-
|
|
6870
|
+
"## Benchmark Summary",
|
|
6871
|
+
"",
|
|
6872
|
+
...input.report.benchmark ? [
|
|
6873
|
+
`- Generated At: ${input.report.benchmark.generatedAt}`,
|
|
6874
|
+
`- Status: ${input.report.benchmark.stale ? "Stale (graph changed since benchmark ran)" : "Fresh"}`,
|
|
6875
|
+
`- Naive Corpus Tokens: ${input.report.benchmark.summary.naiveCorpusTokens}`,
|
|
6876
|
+
`- Final Context Tokens: ${input.report.benchmark.summary.finalContextTokens}`,
|
|
6877
|
+
`- Unique Nodes Considered: ${input.report.benchmark.summary.uniqueVisitedNodes}`,
|
|
6878
|
+
`- Reduction Ratio: ${(input.report.benchmark.summary.reductionRatio * 100).toFixed(1)}%`,
|
|
6879
|
+
`- Questions: ${input.report.benchmark.questionCount}`,
|
|
6177
6880
|
""
|
|
6178
|
-
] : [],
|
|
6179
|
-
"## God Nodes",
|
|
6881
|
+
] : ["- No benchmark results yet.", ""],
|
|
6882
|
+
"## Top God Nodes",
|
|
6180
6883
|
"",
|
|
6181
|
-
...godNodes.length ? godNodes.map((node) =>
|
|
6884
|
+
...input.report.godNodes.length ? input.report.godNodes.map((node) => {
|
|
6885
|
+
const graphNode = nodesById.get(node.nodeId);
|
|
6886
|
+
return graphNode ? `- ${graphNodeLink(graphNode, pagesById)} (${nodeSummary(graphNode)})` : `- \`${node.nodeId}\``;
|
|
6887
|
+
}) : ["- No high-connectivity nodes detected."],
|
|
6182
6888
|
"",
|
|
6183
|
-
"## Bridge Nodes",
|
|
6889
|
+
"## Top Bridge Nodes",
|
|
6184
6890
|
"",
|
|
6185
|
-
...bridgeNodes.length ? bridgeNodes.map((node) =>
|
|
6891
|
+
...input.report.bridgeNodes.length ? input.report.bridgeNodes.map((node) => {
|
|
6892
|
+
const graphNode = nodesById.get(node.nodeId);
|
|
6893
|
+
return graphNode ? `- ${graphNodeLink(graphNode, pagesById)} (${nodeSummary(graphNode)})` : `- \`${node.nodeId}\``;
|
|
6894
|
+
}) : ["- No cross-community bridge nodes detected."],
|
|
6186
6895
|
"",
|
|
6187
6896
|
"## Communities",
|
|
6188
6897
|
"",
|
|
6189
|
-
...input.communityPages.length ? input.communityPages.map((page) => `- ${pageLink(page)}`) : ["- No community summaries generated yet."],
|
|
6898
|
+
...input.report.communityPages.length ? input.report.communityPages.map((page) => `- ${pageLink(page)}`) : ["- No community summaries generated yet."],
|
|
6190
6899
|
"",
|
|
6191
|
-
"## Thin
|
|
6900
|
+
"## Thin Or Underlinked Areas",
|
|
6192
6901
|
"",
|
|
6193
|
-
...thinCommunities.length ? thinCommunities.map(
|
|
6902
|
+
...input.report.thinCommunities.length ? input.report.thinCommunities.map(
|
|
6903
|
+
(community) => community.path ? `- [[${community.path.replace(/\.md$/, "")}|${community.title ?? community.label}]] (${community.nodeCount} node(s))` : `- ${community.label} (${community.nodeCount} node(s))`
|
|
6904
|
+
) : ["- No thin communities detected."],
|
|
6194
6905
|
"",
|
|
6195
|
-
"##
|
|
6906
|
+
"## Surprising Connections",
|
|
6196
6907
|
"",
|
|
6197
|
-
...
|
|
6198
|
-
const source = nodesById.get(
|
|
6199
|
-
const target = nodesById.get(
|
|
6200
|
-
|
|
6908
|
+
...input.report.surprisingConnections.length ? input.report.surprisingConnections.map((connection) => {
|
|
6909
|
+
const source = nodesById.get(connection.sourceNodeId);
|
|
6910
|
+
const target = nodesById.get(connection.targetNodeId);
|
|
6911
|
+
const sourceLabel = source ? graphNodeLink(source, pagesById) : `\`${connection.sourceNodeId}\``;
|
|
6912
|
+
const targetLabel = target ? graphNodeLink(target, pagesById) : `\`${connection.targetNodeId}\``;
|
|
6913
|
+
return `- ${sourceLabel} ${connection.relation} ${targetLabel} (${connection.evidenceClass}, ${connection.confidence.toFixed(2)}). ${connection.explanation} Path: ${connection.pathSummary}.`;
|
|
6201
6914
|
}) : ["- No cross-community links detected."],
|
|
6202
6915
|
"",
|
|
6203
|
-
"##
|
|
6916
|
+
"## New Research Sources",
|
|
6917
|
+
"",
|
|
6918
|
+
...input.report.recentResearchSources.length ? input.report.recentResearchSources.map(
|
|
6919
|
+
(page) => `- [[${page.path.replace(/\.md$/, "")}|${page.title}]] (\`${page.sourceType}\`, updated ${page.updatedAt})`
|
|
6920
|
+
) : ["- No newly captured research sources since the previous compile."],
|
|
6921
|
+
"",
|
|
6922
|
+
"## Suggested Questions",
|
|
6204
6923
|
"",
|
|
6205
|
-
...
|
|
6924
|
+
...input.report.suggestedQuestions.map((question) => `- ${question}`),
|
|
6206
6925
|
""
|
|
6207
6926
|
].join("\n");
|
|
6208
6927
|
return {
|
|
@@ -6228,7 +6947,7 @@ function buildGraphReportPage(input) {
|
|
|
6228
6947
|
compiledFrom: input.metadata.compiledFrom,
|
|
6229
6948
|
managedBy: input.metadata.managedBy
|
|
6230
6949
|
},
|
|
6231
|
-
content:
|
|
6950
|
+
content: matter5.stringify(body, frontmatter)
|
|
6232
6951
|
};
|
|
6233
6952
|
}
|
|
6234
6953
|
function buildCommunitySummaryPage(input) {
|
|
@@ -6237,14 +6956,14 @@ function buildCommunitySummaryPage(input) {
|
|
|
6237
6956
|
const nodesById = new Map(input.graph.nodes.map((node) => [node.id, node]));
|
|
6238
6957
|
const pagesById = new Map(input.graph.pages.map((page) => [page.id, page]));
|
|
6239
6958
|
const communityNodes = input.community.nodeIds.map((nodeId) => nodesById.get(nodeId)).filter((node) => Boolean(node));
|
|
6240
|
-
const communityPageIds =
|
|
6959
|
+
const communityPageIds = uniqueStrings2(communityNodes.map((node) => node.pageId ?? ""));
|
|
6241
6960
|
const communityPages = communityPageIds.map((id) => pagesById.get(id)).filter((page) => Boolean(page));
|
|
6242
6961
|
const externalEdges = input.graph.edges.filter((edge) => {
|
|
6243
6962
|
const source = nodesById.get(edge.source);
|
|
6244
6963
|
const target = nodesById.get(edge.target);
|
|
6245
6964
|
return source?.communityId === input.community.id && target?.communityId && target.communityId !== input.community.id;
|
|
6246
6965
|
}).slice(0, 8);
|
|
6247
|
-
const relatedSourceIds =
|
|
6966
|
+
const relatedSourceIds = uniqueStrings2(communityNodes.flatMap((node) => node.sourceIds));
|
|
6248
6967
|
const frontmatter = {
|
|
6249
6968
|
page_id: pageId,
|
|
6250
6969
|
kind: "community_summary",
|
|
@@ -6263,7 +6982,7 @@ function buildCommunitySummaryPage(input) {
|
|
|
6263
6982
|
backlinks: ["graph:report"],
|
|
6264
6983
|
schema_hash: input.schemaHash,
|
|
6265
6984
|
source_hashes: {},
|
|
6266
|
-
related_page_ids:
|
|
6985
|
+
related_page_ids: uniqueStrings2(["graph:report", ...communityPageIds]),
|
|
6267
6986
|
related_node_ids: input.community.nodeIds,
|
|
6268
6987
|
related_source_ids: relatedSourceIds
|
|
6269
6988
|
};
|
|
@@ -6302,7 +7021,7 @@ function buildCommunitySummaryPage(input) {
|
|
|
6302
7021
|
backlinks: ["graph:report"],
|
|
6303
7022
|
schemaHash: input.schemaHash,
|
|
6304
7023
|
sourceHashes: {},
|
|
6305
|
-
relatedPageIds:
|
|
7024
|
+
relatedPageIds: uniqueStrings2(["graph:report", ...communityPageIds]),
|
|
6306
7025
|
relatedNodeIds: input.community.nodeIds,
|
|
6307
7026
|
relatedSourceIds,
|
|
6308
7027
|
createdAt: input.metadata.createdAt,
|
|
@@ -6310,11 +7029,11 @@ function buildCommunitySummaryPage(input) {
|
|
|
6310
7029
|
compiledFrom: input.metadata.compiledFrom,
|
|
6311
7030
|
managedBy: input.metadata.managedBy
|
|
6312
7031
|
},
|
|
6313
|
-
content:
|
|
7032
|
+
content: matter5.stringify(body, frontmatter)
|
|
6314
7033
|
};
|
|
6315
7034
|
}
|
|
6316
7035
|
function buildProjectsIndex(projectPages, schemaHash, metadata) {
|
|
6317
|
-
return
|
|
7036
|
+
return matter5.stringify(
|
|
6318
7037
|
[
|
|
6319
7038
|
"# Projects",
|
|
6320
7039
|
"",
|
|
@@ -6344,7 +7063,7 @@ function buildProjectsIndex(projectPages, schemaHash, metadata) {
|
|
|
6344
7063
|
}
|
|
6345
7064
|
function buildProjectIndex(input) {
|
|
6346
7065
|
const title = `Project: ${input.projectId}`;
|
|
6347
|
-
return
|
|
7066
|
+
return matter5.stringify(
|
|
6348
7067
|
[
|
|
6349
7068
|
`# ${title}`,
|
|
6350
7069
|
"",
|
|
@@ -6457,7 +7176,7 @@ function buildOutputPage(input) {
|
|
|
6457
7176
|
outputFormat: input.outputFormat,
|
|
6458
7177
|
outputAssets
|
|
6459
7178
|
},
|
|
6460
|
-
content:
|
|
7179
|
+
content: matter5.stringify(
|
|
6461
7180
|
(input.outputFormat === "slides" ? [
|
|
6462
7181
|
input.answer,
|
|
6463
7182
|
"",
|
|
@@ -6583,7 +7302,7 @@ function buildExploreHubPage(input) {
|
|
|
6583
7302
|
outputFormat: input.outputFormat,
|
|
6584
7303
|
outputAssets
|
|
6585
7304
|
},
|
|
6586
|
-
content:
|
|
7305
|
+
content: matter5.stringify(
|
|
6587
7306
|
(input.outputFormat === "slides" ? [
|
|
6588
7307
|
`# ${title}`,
|
|
6589
7308
|
"",
|
|
@@ -6653,49 +7372,49 @@ function buildExploreHubPage(input) {
|
|
|
6653
7372
|
}
|
|
6654
7373
|
|
|
6655
7374
|
// src/output-artifacts.ts
|
|
6656
|
-
import { z as
|
|
7375
|
+
import { z as z6 } from "zod";
|
|
6657
7376
|
function escapeXml(value) {
|
|
6658
7377
|
return value.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
|
|
6659
7378
|
}
|
|
6660
7379
|
function clampNumber(value, min, max) {
|
|
6661
7380
|
return Math.min(max, Math.max(min, value));
|
|
6662
7381
|
}
|
|
6663
|
-
var chartSpecSchema =
|
|
6664
|
-
kind:
|
|
6665
|
-
title:
|
|
6666
|
-
subtitle:
|
|
6667
|
-
xLabel:
|
|
6668
|
-
yLabel:
|
|
6669
|
-
seriesLabel:
|
|
6670
|
-
data:
|
|
6671
|
-
|
|
6672
|
-
label:
|
|
6673
|
-
value:
|
|
7382
|
+
var chartSpecSchema = z6.object({
|
|
7383
|
+
kind: z6.enum(["bar", "line"]).default("bar"),
|
|
7384
|
+
title: z6.string().min(1),
|
|
7385
|
+
subtitle: z6.string().optional(),
|
|
7386
|
+
xLabel: z6.string().optional(),
|
|
7387
|
+
yLabel: z6.string().optional(),
|
|
7388
|
+
seriesLabel: z6.string().optional(),
|
|
7389
|
+
data: z6.array(
|
|
7390
|
+
z6.object({
|
|
7391
|
+
label: z6.string().min(1),
|
|
7392
|
+
value: z6.number().finite()
|
|
6674
7393
|
})
|
|
6675
7394
|
).min(2).max(12),
|
|
6676
|
-
notes:
|
|
7395
|
+
notes: z6.array(z6.string().min(1)).max(5).optional()
|
|
6677
7396
|
});
|
|
6678
|
-
var sceneSpecSchema =
|
|
6679
|
-
title:
|
|
6680
|
-
alt:
|
|
6681
|
-
background:
|
|
6682
|
-
width:
|
|
6683
|
-
height:
|
|
6684
|
-
elements:
|
|
6685
|
-
|
|
6686
|
-
kind:
|
|
6687
|
-
shape:
|
|
6688
|
-
x:
|
|
6689
|
-
y:
|
|
6690
|
-
width:
|
|
6691
|
-
height:
|
|
6692
|
-
radius:
|
|
6693
|
-
text:
|
|
6694
|
-
fontSize:
|
|
6695
|
-
fill:
|
|
6696
|
-
stroke:
|
|
6697
|
-
strokeWidth:
|
|
6698
|
-
opacity:
|
|
7397
|
+
var sceneSpecSchema = z6.object({
|
|
7398
|
+
title: z6.string().min(1),
|
|
7399
|
+
alt: z6.string().min(1),
|
|
7400
|
+
background: z6.string().optional(),
|
|
7401
|
+
width: z6.number().int().positive().max(2400).optional(),
|
|
7402
|
+
height: z6.number().int().positive().max(2400).optional(),
|
|
7403
|
+
elements: z6.array(
|
|
7404
|
+
z6.object({
|
|
7405
|
+
kind: z6.enum(["shape", "label"]),
|
|
7406
|
+
shape: z6.enum(["rect", "circle", "line"]).optional(),
|
|
7407
|
+
x: z6.number().finite(),
|
|
7408
|
+
y: z6.number().finite(),
|
|
7409
|
+
width: z6.number().finite().optional(),
|
|
7410
|
+
height: z6.number().finite().optional(),
|
|
7411
|
+
radius: z6.number().finite().optional(),
|
|
7412
|
+
text: z6.string().optional(),
|
|
7413
|
+
fontSize: z6.number().finite().optional(),
|
|
7414
|
+
fill: z6.string().optional(),
|
|
7415
|
+
stroke: z6.string().optional(),
|
|
7416
|
+
strokeWidth: z6.number().finite().optional(),
|
|
7417
|
+
opacity: z6.number().finite().optional()
|
|
6699
7418
|
})
|
|
6700
7419
|
).min(1).max(32)
|
|
6701
7420
|
});
|
|
@@ -6847,14 +7566,14 @@ function buildOutputAssetManifest(input) {
|
|
|
6847
7566
|
}
|
|
6848
7567
|
|
|
6849
7568
|
// src/outputs.ts
|
|
7569
|
+
import fs13 from "fs/promises";
|
|
7570
|
+
import path16 from "path";
|
|
7571
|
+
import matter7 from "gray-matter";
|
|
7572
|
+
|
|
7573
|
+
// src/pages.ts
|
|
6850
7574
|
import fs12 from "fs/promises";
|
|
6851
7575
|
import path15 from "path";
|
|
6852
7576
|
import matter6 from "gray-matter";
|
|
6853
|
-
|
|
6854
|
-
// src/pages.ts
|
|
6855
|
-
import fs11 from "fs/promises";
|
|
6856
|
-
import path14 from "path";
|
|
6857
|
-
import matter5 from "gray-matter";
|
|
6858
7577
|
function normalizeStringArray(value) {
|
|
6859
7578
|
return Array.isArray(value) ? value.filter((item) => typeof item === "string") : [];
|
|
6860
7579
|
}
|
|
@@ -6875,6 +7594,9 @@ function normalizePageStatus(value, fallback = "active") {
|
|
|
6875
7594
|
function normalizePageManager(value, fallback = "system") {
|
|
6876
7595
|
return value === "human" || value === "system" ? value : fallback;
|
|
6877
7596
|
}
|
|
7597
|
+
function normalizeSourceType(value) {
|
|
7598
|
+
return value === "arxiv" || value === "doi" || value === "tweet" || value === "article" || value === "url" ? value : void 0;
|
|
7599
|
+
}
|
|
6878
7600
|
function normalizeOutputFormat(value, fallback = "markdown") {
|
|
6879
7601
|
return value === "report" || value === "slides" || value === "chart" || value === "image" ? value : fallback;
|
|
6880
7602
|
}
|
|
@@ -6925,8 +7647,8 @@ async function loadExistingManagedPageState(absolutePath, defaults = {}) {
|
|
|
6925
7647
|
updatedAt: updatedFallback
|
|
6926
7648
|
};
|
|
6927
7649
|
}
|
|
6928
|
-
const content = await
|
|
6929
|
-
const parsed =
|
|
7650
|
+
const content = await fs12.readFile(absolutePath, "utf8");
|
|
7651
|
+
const parsed = matter6(content);
|
|
6930
7652
|
return {
|
|
6931
7653
|
status: normalizePageStatus(parsed.data.status, defaults.status ?? "active"),
|
|
6932
7654
|
managedBy: normalizePageManager(parsed.data.managed_by, defaults.managedBy ?? "system"),
|
|
@@ -6960,11 +7682,11 @@ function inferPageKind(relativePath, explicitKind = void 0) {
|
|
|
6960
7682
|
return "index";
|
|
6961
7683
|
}
|
|
6962
7684
|
function parseStoredPage(relativePath, content, defaults = {}) {
|
|
6963
|
-
const parsed =
|
|
7685
|
+
const parsed = matter6(content);
|
|
6964
7686
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
6965
7687
|
const fallbackCreatedAt = defaults.createdAt ?? now;
|
|
6966
7688
|
const fallbackUpdatedAt = defaults.updatedAt ?? fallbackCreatedAt;
|
|
6967
|
-
const title = typeof parsed.data.title === "string" ? parsed.data.title :
|
|
7689
|
+
const title = typeof parsed.data.title === "string" ? parsed.data.title : path15.basename(relativePath, ".md");
|
|
6968
7690
|
const kind = inferPageKind(relativePath, parsed.data.kind);
|
|
6969
7691
|
const sourceIds = normalizeStringArray(parsed.data.source_ids);
|
|
6970
7692
|
const projectIds = normalizeProjectIds(parsed.data.project_ids);
|
|
@@ -6980,6 +7702,7 @@ function parseStoredPage(relativePath, content, defaults = {}) {
|
|
|
6980
7702
|
path: relativePath,
|
|
6981
7703
|
title,
|
|
6982
7704
|
kind,
|
|
7705
|
+
sourceType: normalizeSourceType(parsed.data.source_type),
|
|
6983
7706
|
sourceIds,
|
|
6984
7707
|
projectIds,
|
|
6985
7708
|
nodeIds,
|
|
@@ -7003,18 +7726,18 @@ function parseStoredPage(relativePath, content, defaults = {}) {
|
|
|
7003
7726
|
};
|
|
7004
7727
|
}
|
|
7005
7728
|
async function loadInsightPages(wikiDir) {
|
|
7006
|
-
const insightsDir =
|
|
7729
|
+
const insightsDir = path15.join(wikiDir, "insights");
|
|
7007
7730
|
if (!await fileExists(insightsDir)) {
|
|
7008
7731
|
return [];
|
|
7009
7732
|
}
|
|
7010
|
-
const files = (await listFilesRecursive(insightsDir)).filter((filePath) => filePath.endsWith(".md")).filter((filePath) =>
|
|
7733
|
+
const files = (await listFilesRecursive(insightsDir)).filter((filePath) => filePath.endsWith(".md")).filter((filePath) => path15.basename(filePath) !== "index.md").sort((left, right) => left.localeCompare(right));
|
|
7011
7734
|
const insights = [];
|
|
7012
7735
|
for (const absolutePath of files) {
|
|
7013
|
-
const relativePath = toPosix(
|
|
7014
|
-
const content = await
|
|
7015
|
-
const parsed =
|
|
7016
|
-
const stats = await
|
|
7017
|
-
const title = typeof parsed.data.title === "string" ? parsed.data.title :
|
|
7736
|
+
const relativePath = toPosix(path15.relative(wikiDir, absolutePath));
|
|
7737
|
+
const content = await fs12.readFile(absolutePath, "utf8");
|
|
7738
|
+
const parsed = matter6(content);
|
|
7739
|
+
const stats = await fs12.stat(absolutePath);
|
|
7740
|
+
const title = typeof parsed.data.title === "string" ? parsed.data.title : path15.basename(absolutePath, ".md");
|
|
7018
7741
|
const sourceIds = normalizeStringArray(parsed.data.source_ids);
|
|
7019
7742
|
const projectIds = normalizeProjectIds(parsed.data.project_ids);
|
|
7020
7743
|
const nodeIds = normalizeStringArray(parsed.data.node_ids);
|
|
@@ -7076,28 +7799,28 @@ function relatedOutputsForPage(targetPage, outputPages) {
|
|
|
7076
7799
|
return outputPages.map((page) => ({ page, rank: relationRank(page, targetPage) })).filter((item) => item.rank > 0).sort((left, right) => right.rank - left.rank || left.page.title.localeCompare(right.page.title)).map((item) => item.page);
|
|
7077
7800
|
}
|
|
7078
7801
|
async function resolveUniqueOutputSlug(wikiDir, baseSlug) {
|
|
7079
|
-
const outputsDir =
|
|
7802
|
+
const outputsDir = path16.join(wikiDir, "outputs");
|
|
7080
7803
|
const root = baseSlug || "output";
|
|
7081
7804
|
let candidate = root;
|
|
7082
7805
|
let counter = 2;
|
|
7083
|
-
while (await fileExists(
|
|
7806
|
+
while (await fileExists(path16.join(outputsDir, `${candidate}.md`))) {
|
|
7084
7807
|
candidate = `${root}-${counter}`;
|
|
7085
7808
|
counter++;
|
|
7086
7809
|
}
|
|
7087
7810
|
return candidate;
|
|
7088
7811
|
}
|
|
7089
7812
|
async function loadSavedOutputPages(wikiDir) {
|
|
7090
|
-
const outputsDir =
|
|
7091
|
-
const entries = await
|
|
7813
|
+
const outputsDir = path16.join(wikiDir, "outputs");
|
|
7814
|
+
const entries = await fs13.readdir(outputsDir, { withFileTypes: true }).catch(() => []);
|
|
7092
7815
|
const outputs = [];
|
|
7093
7816
|
for (const entry of entries) {
|
|
7094
7817
|
if (!entry.isFile() || !entry.name.endsWith(".md") || entry.name === "index.md") {
|
|
7095
7818
|
continue;
|
|
7096
7819
|
}
|
|
7097
|
-
const relativePath =
|
|
7098
|
-
const absolutePath =
|
|
7099
|
-
const content = await
|
|
7100
|
-
const parsed =
|
|
7820
|
+
const relativePath = path16.posix.join("outputs", entry.name);
|
|
7821
|
+
const absolutePath = path16.join(outputsDir, entry.name);
|
|
7822
|
+
const content = await fs13.readFile(absolutePath, "utf8");
|
|
7823
|
+
const parsed = matter7(content);
|
|
7101
7824
|
const slug = entry.name.replace(/\.md$/, "");
|
|
7102
7825
|
const title = typeof parsed.data.title === "string" ? parsed.data.title : slug;
|
|
7103
7826
|
const pageId = typeof parsed.data.page_id === "string" ? parsed.data.page_id : `output:${slug}`;
|
|
@@ -7109,7 +7832,7 @@ async function loadSavedOutputPages(wikiDir) {
|
|
|
7109
7832
|
const relatedSourceIds = normalizeStringArray(parsed.data.related_source_ids);
|
|
7110
7833
|
const backlinks = normalizeStringArray(parsed.data.backlinks);
|
|
7111
7834
|
const compiledFrom = normalizeStringArray(parsed.data.compiled_from);
|
|
7112
|
-
const stats = await
|
|
7835
|
+
const stats = await fs13.stat(absolutePath);
|
|
7113
7836
|
const createdAt = typeof parsed.data.created_at === "string" ? parsed.data.created_at : stats.birthtimeMs > 0 ? stats.birthtime.toISOString() : stats.mtime.toISOString();
|
|
7114
7837
|
const updatedAt = typeof parsed.data.updated_at === "string" ? parsed.data.updated_at : stats.mtime.toISOString();
|
|
7115
7838
|
outputs.push({
|
|
@@ -7147,9 +7870,9 @@ async function loadSavedOutputPages(wikiDir) {
|
|
|
7147
7870
|
}
|
|
7148
7871
|
|
|
7149
7872
|
// src/search.ts
|
|
7150
|
-
import
|
|
7151
|
-
import
|
|
7152
|
-
import
|
|
7873
|
+
import fs14 from "fs/promises";
|
|
7874
|
+
import path17 from "path";
|
|
7875
|
+
import matter8 from "gray-matter";
|
|
7153
7876
|
function getDatabaseSync() {
|
|
7154
7877
|
const builtin = process.getBuiltinModule?.("node:sqlite");
|
|
7155
7878
|
if (!builtin?.DatabaseSync) {
|
|
@@ -7167,8 +7890,11 @@ function normalizeKind(value) {
|
|
|
7167
7890
|
function normalizeStatus(value) {
|
|
7168
7891
|
return value === "draft" || value === "candidate" || value === "active" || value === "archived" ? value : void 0;
|
|
7169
7892
|
}
|
|
7893
|
+
function normalizeSourceType2(value) {
|
|
7894
|
+
return value === "arxiv" || value === "doi" || value === "tweet" || value === "article" || value === "url" ? value : void 0;
|
|
7895
|
+
}
|
|
7170
7896
|
async function rebuildSearchIndex(dbPath, pages, wikiDir) {
|
|
7171
|
-
await ensureDir(
|
|
7897
|
+
await ensureDir(path17.dirname(dbPath));
|
|
7172
7898
|
const DatabaseSync = getDatabaseSync();
|
|
7173
7899
|
const db = new DatabaseSync(dbPath);
|
|
7174
7900
|
db.exec("PRAGMA journal_mode = WAL;");
|
|
@@ -7182,6 +7908,7 @@ async function rebuildSearchIndex(dbPath, pages, wikiDir) {
|
|
|
7182
7908
|
body TEXT NOT NULL,
|
|
7183
7909
|
kind TEXT NOT NULL,
|
|
7184
7910
|
status TEXT NOT NULL,
|
|
7911
|
+
source_type TEXT NOT NULL,
|
|
7185
7912
|
project_ids TEXT NOT NULL,
|
|
7186
7913
|
project_key TEXT NOT NULL
|
|
7187
7914
|
);
|
|
@@ -7195,12 +7922,12 @@ async function rebuildSearchIndex(dbPath, pages, wikiDir) {
|
|
|
7195
7922
|
DELETE FROM pages;
|
|
7196
7923
|
`);
|
|
7197
7924
|
const insertPage = db.prepare(
|
|
7198
|
-
"INSERT INTO pages (id, path, title, body, kind, status, project_ids, project_key) VALUES (?, ?, ?, ?, ?, ?, ?, ?)"
|
|
7925
|
+
"INSERT INTO pages (id, path, title, body, kind, status, source_type, project_ids, project_key) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)"
|
|
7199
7926
|
);
|
|
7200
7927
|
for (const page of pages) {
|
|
7201
|
-
const absolutePath =
|
|
7202
|
-
const content = await
|
|
7203
|
-
const parsed =
|
|
7928
|
+
const absolutePath = path17.join(wikiDir, page.path);
|
|
7929
|
+
const content = await fs14.readFile(absolutePath, "utf8");
|
|
7930
|
+
const parsed = matter8(content);
|
|
7204
7931
|
insertPage.run(
|
|
7205
7932
|
page.id,
|
|
7206
7933
|
page.path,
|
|
@@ -7208,6 +7935,7 @@ async function rebuildSearchIndex(dbPath, pages, wikiDir) {
|
|
|
7208
7935
|
parsed.content,
|
|
7209
7936
|
page.kind,
|
|
7210
7937
|
page.status,
|
|
7938
|
+
typeof parsed.data.source_type === "string" ? parsed.data.source_type : "",
|
|
7211
7939
|
JSON.stringify(page.projectIds),
|
|
7212
7940
|
page.projectIds.map((projectId) => `|${projectId}|`).join("")
|
|
7213
7941
|
);
|
|
@@ -7241,6 +7969,10 @@ function searchPages(dbPath, query, limitOrOptions = 5) {
|
|
|
7241
7969
|
params.push(`%|${options.project}|%`);
|
|
7242
7970
|
}
|
|
7243
7971
|
}
|
|
7972
|
+
if (options.sourceType && options.sourceType !== "all") {
|
|
7973
|
+
clauses.push("pages.source_type = ?");
|
|
7974
|
+
params.push(options.sourceType);
|
|
7975
|
+
}
|
|
7244
7976
|
const statement = db.prepare(`
|
|
7245
7977
|
SELECT
|
|
7246
7978
|
pages.id AS pageId,
|
|
@@ -7248,6 +7980,7 @@ function searchPages(dbPath, query, limitOrOptions = 5) {
|
|
|
7248
7980
|
pages.title AS title,
|
|
7249
7981
|
pages.kind AS kind,
|
|
7250
7982
|
pages.status AS status,
|
|
7983
|
+
pages.source_type AS sourceType,
|
|
7251
7984
|
pages.project_ids AS projectIds,
|
|
7252
7985
|
snippet(page_search, 1, '[', ']', '...', 16) AS snippet,
|
|
7253
7986
|
bm25(page_search) AS rank
|
|
@@ -7275,13 +8008,14 @@ function searchPages(dbPath, query, limitOrOptions = 5) {
|
|
|
7275
8008
|
title: String(row.title ?? ""),
|
|
7276
8009
|
kind: normalizeKind(row.kind),
|
|
7277
8010
|
status: normalizeStatus(row.status),
|
|
8011
|
+
sourceType: normalizeSourceType2(row.sourceType),
|
|
7278
8012
|
snippet: String(row.snippet ?? ""),
|
|
7279
8013
|
rank: Number(row.rank ?? 0)
|
|
7280
8014
|
}));
|
|
7281
8015
|
}
|
|
7282
8016
|
|
|
7283
8017
|
// src/vault.ts
|
|
7284
|
-
function
|
|
8018
|
+
function uniqueStrings3(values) {
|
|
7285
8019
|
return uniqueBy(values.filter(Boolean), (value) => value);
|
|
7286
8020
|
}
|
|
7287
8021
|
function normalizeOutputFormat2(format) {
|
|
@@ -7302,7 +8036,7 @@ function outputFormatInstruction(format) {
|
|
|
7302
8036
|
}
|
|
7303
8037
|
}
|
|
7304
8038
|
function outputAssetPath(slug, fileName) {
|
|
7305
|
-
return toPosix(
|
|
8039
|
+
return toPosix(path18.join("outputs", "assets", slug, fileName));
|
|
7306
8040
|
}
|
|
7307
8041
|
function outputAssetId(slug, role) {
|
|
7308
8042
|
return `output:${slug}:asset:${role}`;
|
|
@@ -7442,7 +8176,7 @@ async function resolveImageGenerationProvider(rootDir) {
|
|
|
7442
8176
|
if (!providerConfig) {
|
|
7443
8177
|
throw new Error(`No provider configured with id "${preferredProviderId}" for task "imageProvider".`);
|
|
7444
8178
|
}
|
|
7445
|
-
const { createProvider: createProvider2 } = await import("./registry-
|
|
8179
|
+
const { createProvider: createProvider2 } = await import("./registry-6KZMA3XM.js");
|
|
7446
8180
|
return createProvider2(preferredProviderId, providerConfig, rootDir);
|
|
7447
8181
|
}
|
|
7448
8182
|
async function generateOutputArtifacts(rootDir, input) {
|
|
@@ -7640,13 +8374,13 @@ async function generateOutputArtifacts(rootDir, input) {
|
|
|
7640
8374
|
};
|
|
7641
8375
|
}
|
|
7642
8376
|
function normalizeProjectRoot(root) {
|
|
7643
|
-
const normalized = toPosix(
|
|
8377
|
+
const normalized = toPosix(path18.posix.normalize(root.replace(/\\/g, "/"))).replace(/^\.\/+/, "").replace(/\/+$/, "");
|
|
7644
8378
|
return normalized;
|
|
7645
8379
|
}
|
|
7646
8380
|
function projectEntries(config) {
|
|
7647
8381
|
return Object.entries(config.projects ?? {}).map(([id, project]) => ({
|
|
7648
8382
|
id,
|
|
7649
|
-
roots:
|
|
8383
|
+
roots: uniqueStrings3(project.roots.map(normalizeProjectRoot)).filter(Boolean),
|
|
7650
8384
|
schemaPath: project.schemaPath
|
|
7651
8385
|
})).sort((left, right) => left.id.localeCompare(right.id));
|
|
7652
8386
|
}
|
|
@@ -7666,10 +8400,10 @@ function manifestPathForProject(rootDir, manifest) {
|
|
|
7666
8400
|
if (!rawPath) {
|
|
7667
8401
|
return toPosix(manifest.storedPath);
|
|
7668
8402
|
}
|
|
7669
|
-
if (!
|
|
8403
|
+
if (!path18.isAbsolute(rawPath)) {
|
|
7670
8404
|
return normalizeProjectRoot(rawPath);
|
|
7671
8405
|
}
|
|
7672
|
-
const relative = toPosix(
|
|
8406
|
+
const relative = toPosix(path18.relative(rootDir, rawPath));
|
|
7673
8407
|
return relative.startsWith("..") ? toPosix(rawPath) : normalizeProjectRoot(relative);
|
|
7674
8408
|
}
|
|
7675
8409
|
function prefixMatches(value, prefix) {
|
|
@@ -7694,11 +8428,11 @@ function resolveSourceProjects(rootDir, manifests, config) {
|
|
|
7694
8428
|
return Object.fromEntries(manifests.map((manifest) => [manifest.sourceId, resolveSourceProjectId(rootDir, manifest, config)]));
|
|
7695
8429
|
}
|
|
7696
8430
|
function scopedProjectIdsFromSources(sourceIds, sourceProjects) {
|
|
7697
|
-
const projectIds =
|
|
8431
|
+
const projectIds = uniqueStrings3(sourceIds.map((sourceId) => sourceProjects[sourceId] ?? "").filter(Boolean));
|
|
7698
8432
|
return projectIds.length === 1 ? projectIds : [];
|
|
7699
8433
|
}
|
|
7700
8434
|
function schemaProjectIdsFromPages(pageIds, pageMap2) {
|
|
7701
|
-
return
|
|
8435
|
+
return uniqueStrings3(
|
|
7702
8436
|
pageIds.flatMap((pageId) => pageMap2.get(pageId)?.projectIds ?? []).filter(Boolean).sort((left, right) => left.localeCompare(right))
|
|
7703
8437
|
);
|
|
7704
8438
|
}
|
|
@@ -7707,7 +8441,7 @@ function categoryTagsForSchema(schema, texts) {
|
|
|
7707
8441
|
if (!haystack) {
|
|
7708
8442
|
return [];
|
|
7709
8443
|
}
|
|
7710
|
-
return
|
|
8444
|
+
return uniqueStrings3(
|
|
7711
8445
|
schemaCategoryLabels({ path: "", hash: "", content: schema.content }).filter((label) => haystack.includes(label.toLowerCase())).map((label) => `category/${slugify(label)}`)
|
|
7712
8446
|
).slice(0, 3);
|
|
7713
8447
|
}
|
|
@@ -7843,7 +8577,7 @@ function pageHashes(pages) {
|
|
|
7843
8577
|
return Object.fromEntries(pages.map((page) => [page.page.id, page.contentHash]));
|
|
7844
8578
|
}
|
|
7845
8579
|
async function buildManagedGraphPage(absolutePath, defaults, build) {
|
|
7846
|
-
const existingContent = await fileExists(absolutePath) ? await
|
|
8580
|
+
const existingContent = await fileExists(absolutePath) ? await fs15.readFile(absolutePath, "utf8") : null;
|
|
7847
8581
|
let existing = await loadExistingManagedPageState(absolutePath, {
|
|
7848
8582
|
status: defaults.status ?? "active",
|
|
7849
8583
|
managedBy: defaults.managedBy
|
|
@@ -7881,7 +8615,7 @@ async function buildManagedGraphPage(absolutePath, defaults, build) {
|
|
|
7881
8615
|
return built;
|
|
7882
8616
|
}
|
|
7883
8617
|
async function buildManagedContent(absolutePath, defaults, build) {
|
|
7884
|
-
const existingContent = await fileExists(absolutePath) ? await
|
|
8618
|
+
const existingContent = await fileExists(absolutePath) ? await fs15.readFile(absolutePath, "utf8") : null;
|
|
7885
8619
|
let existing = await loadExistingManagedPageState(absolutePath, {
|
|
7886
8620
|
status: defaults.status ?? "active",
|
|
7887
8621
|
managedBy: defaults.managedBy
|
|
@@ -7918,7 +8652,7 @@ async function buildManagedContent(absolutePath, defaults, build) {
|
|
|
7918
8652
|
return content;
|
|
7919
8653
|
}
|
|
7920
8654
|
function indexCompiledFrom(pages) {
|
|
7921
|
-
return
|
|
8655
|
+
return uniqueStrings3(pages.flatMap((page) => page.sourceIds));
|
|
7922
8656
|
}
|
|
7923
8657
|
function deriveGraphMetrics(nodes, edges) {
|
|
7924
8658
|
const adjacency = /* @__PURE__ */ new Map();
|
|
@@ -8321,17 +9055,29 @@ function buildGraph(manifests, analyses, pages, sourceProjects, _codeIndex) {
|
|
|
8321
9055
|
pages
|
|
8322
9056
|
};
|
|
8323
9057
|
}
|
|
8324
|
-
|
|
9058
|
+
function recentResearchSourcePages(graph, previousCompiledAt) {
|
|
9059
|
+
const previousTimestamp = previousCompiledAt ? Date.parse(previousCompiledAt) : Number.NaN;
|
|
9060
|
+
return graph.pages.filter(
|
|
9061
|
+
(page) => page.kind === "source" && Boolean(page.sourceType) && page.sourceType !== "url"
|
|
9062
|
+
).filter((page) => Number.isNaN(previousTimestamp) || Date.parse(page.updatedAt) > previousTimestamp).sort((left, right) => right.updatedAt.localeCompare(left.updatedAt) || left.title.localeCompare(right.title)).slice(0, 8).map((page) => ({
|
|
9063
|
+
id: page.id,
|
|
9064
|
+
path: page.path,
|
|
9065
|
+
title: page.title,
|
|
9066
|
+
updatedAt: page.updatedAt,
|
|
9067
|
+
sourceType: page.sourceType
|
|
9068
|
+
}));
|
|
9069
|
+
}
|
|
9070
|
+
async function buildGraphOrientationPages(graph, paths, schemaHash, previousCompiledAt) {
|
|
8325
9071
|
const benchmark = await readJsonFile(paths.benchmarkPath);
|
|
8326
9072
|
const communityRecords = [];
|
|
8327
9073
|
for (const community of graph.communities ?? []) {
|
|
8328
|
-
const absolutePath =
|
|
9074
|
+
const absolutePath = path18.join(paths.wikiDir, "graph", "communities", `${community.id.replace(/^community:/, "")}.md`);
|
|
8329
9075
|
communityRecords.push(
|
|
8330
9076
|
await buildManagedGraphPage(
|
|
8331
9077
|
absolutePath,
|
|
8332
9078
|
{
|
|
8333
9079
|
managedBy: "system",
|
|
8334
|
-
compiledFrom:
|
|
9080
|
+
compiledFrom: uniqueStrings3(
|
|
8335
9081
|
community.nodeIds.flatMap((nodeId) => graph.nodes.find((node) => node.id === nodeId)?.sourceIds ?? [])
|
|
8336
9082
|
),
|
|
8337
9083
|
confidence: 1
|
|
@@ -8345,26 +9091,36 @@ async function buildGraphOrientationPages(graph, paths, schemaHash) {
|
|
|
8345
9091
|
)
|
|
8346
9092
|
);
|
|
8347
9093
|
}
|
|
8348
|
-
const
|
|
9094
|
+
const report = buildGraphReportArtifact({
|
|
9095
|
+
graph,
|
|
9096
|
+
communityPages: communityRecords.map((record) => record.page),
|
|
9097
|
+
benchmark,
|
|
9098
|
+
benchmarkStale: benchmark ? benchmark.graphHash !== graphHash(graph) : false,
|
|
9099
|
+
recentResearchSources: recentResearchSourcePages(graph, previousCompiledAt),
|
|
9100
|
+
graphHash: graphHash(graph)
|
|
9101
|
+
});
|
|
9102
|
+
const reportAbsolutePath = path18.join(paths.wikiDir, "graph", "report.md");
|
|
8349
9103
|
const reportRecord = await buildManagedGraphPage(
|
|
8350
9104
|
reportAbsolutePath,
|
|
8351
9105
|
{
|
|
8352
9106
|
managedBy: "system",
|
|
8353
|
-
compiledFrom:
|
|
9107
|
+
compiledFrom: uniqueStrings3(graph.pages.flatMap((page) => page.sourceIds)),
|
|
8354
9108
|
confidence: 1
|
|
8355
9109
|
},
|
|
8356
9110
|
(metadata) => buildGraphReportPage({
|
|
8357
9111
|
graph,
|
|
8358
9112
|
schemaHash,
|
|
8359
9113
|
metadata,
|
|
8360
|
-
|
|
8361
|
-
benchmark
|
|
9114
|
+
report
|
|
8362
9115
|
})
|
|
8363
9116
|
);
|
|
8364
|
-
return
|
|
9117
|
+
return {
|
|
9118
|
+
records: [reportRecord, ...communityRecords],
|
|
9119
|
+
report
|
|
9120
|
+
};
|
|
8365
9121
|
}
|
|
8366
9122
|
async function writePage(wikiDir, relativePath, content, changedPages) {
|
|
8367
|
-
const absolutePath =
|
|
9123
|
+
const absolutePath = path18.resolve(wikiDir, relativePath);
|
|
8368
9124
|
const changed = await writeFileIfChanged(absolutePath, content);
|
|
8369
9125
|
if (changed) {
|
|
8370
9126
|
changedPages.push(relativePath);
|
|
@@ -8426,29 +9182,29 @@ async function requiredCompileArtifactsExist(paths) {
|
|
|
8426
9182
|
paths.graphPath,
|
|
8427
9183
|
paths.codeIndexPath,
|
|
8428
9184
|
paths.searchDbPath,
|
|
8429
|
-
|
|
8430
|
-
|
|
8431
|
-
|
|
8432
|
-
|
|
8433
|
-
|
|
8434
|
-
|
|
8435
|
-
|
|
8436
|
-
|
|
9185
|
+
path18.join(paths.wikiDir, "index.md"),
|
|
9186
|
+
path18.join(paths.wikiDir, "sources", "index.md"),
|
|
9187
|
+
path18.join(paths.wikiDir, "code", "index.md"),
|
|
9188
|
+
path18.join(paths.wikiDir, "concepts", "index.md"),
|
|
9189
|
+
path18.join(paths.wikiDir, "entities", "index.md"),
|
|
9190
|
+
path18.join(paths.wikiDir, "outputs", "index.md"),
|
|
9191
|
+
path18.join(paths.wikiDir, "projects", "index.md"),
|
|
9192
|
+
path18.join(paths.wikiDir, "candidates", "index.md")
|
|
8437
9193
|
];
|
|
8438
9194
|
const checks = await Promise.all(requiredPaths.map((filePath) => fileExists(filePath)));
|
|
8439
9195
|
return checks.every(Boolean);
|
|
8440
9196
|
}
|
|
8441
9197
|
async function loadAvailableCachedAnalyses(paths, manifests) {
|
|
8442
9198
|
const analyses = await Promise.all(
|
|
8443
|
-
manifests.map(async (manifest) => readJsonFile(
|
|
9199
|
+
manifests.map(async (manifest) => readJsonFile(path18.join(paths.analysesDir, `${manifest.sourceId}.json`)))
|
|
8444
9200
|
);
|
|
8445
9201
|
return analyses.filter((analysis) => Boolean(analysis));
|
|
8446
9202
|
}
|
|
8447
9203
|
function approvalManifestPath(paths, approvalId) {
|
|
8448
|
-
return
|
|
9204
|
+
return path18.join(paths.approvalsDir, approvalId, "manifest.json");
|
|
8449
9205
|
}
|
|
8450
9206
|
function approvalGraphPath(paths, approvalId) {
|
|
8451
|
-
return
|
|
9207
|
+
return path18.join(paths.approvalsDir, approvalId, "state", "graph.json");
|
|
8452
9208
|
}
|
|
8453
9209
|
async function readApprovalManifest(paths, approvalId) {
|
|
8454
9210
|
const manifest = await readJsonFile(approvalManifestPath(paths, approvalId));
|
|
@@ -8458,7 +9214,7 @@ async function readApprovalManifest(paths, approvalId) {
|
|
|
8458
9214
|
return manifest;
|
|
8459
9215
|
}
|
|
8460
9216
|
async function writeApprovalManifest(paths, manifest) {
|
|
8461
|
-
await
|
|
9217
|
+
await fs15.writeFile(approvalManifestPath(paths, manifest.approvalId), `${JSON.stringify(manifest, null, 2)}
|
|
8462
9218
|
`, "utf8");
|
|
8463
9219
|
}
|
|
8464
9220
|
async function buildApprovalEntries(paths, changedFiles, deletedPaths, previousGraph, graph) {
|
|
@@ -8473,7 +9229,7 @@ async function buildApprovalEntries(paths, changedFiles, deletedPaths, previousG
|
|
|
8473
9229
|
continue;
|
|
8474
9230
|
}
|
|
8475
9231
|
const previousPage = previousPagesById.get(nextPage.id);
|
|
8476
|
-
const currentExists = await fileExists(
|
|
9232
|
+
const currentExists = await fileExists(path18.join(paths.wikiDir, file.relativePath));
|
|
8477
9233
|
if (previousPage && previousPage.path !== nextPage.path) {
|
|
8478
9234
|
entries.push({
|
|
8479
9235
|
pageId: nextPage.id,
|
|
@@ -8506,7 +9262,7 @@ async function buildApprovalEntries(paths, changedFiles, deletedPaths, previousG
|
|
|
8506
9262
|
const previousPage = previousPagesByPath.get(deletedPath);
|
|
8507
9263
|
entries.push({
|
|
8508
9264
|
pageId: previousPage?.id ?? `page:${slugify(deletedPath)}`,
|
|
8509
|
-
title: previousPage?.title ??
|
|
9265
|
+
title: previousPage?.title ?? path18.basename(deletedPath, ".md"),
|
|
8510
9266
|
kind: previousPage?.kind ?? "index",
|
|
8511
9267
|
changeType: "delete",
|
|
8512
9268
|
status: "pending",
|
|
@@ -8518,16 +9274,16 @@ async function buildApprovalEntries(paths, changedFiles, deletedPaths, previousG
|
|
|
8518
9274
|
}
|
|
8519
9275
|
async function stageApprovalBundle(paths, changedFiles, deletedPaths, previousGraph, graph) {
|
|
8520
9276
|
const approvalId = `compile-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}`;
|
|
8521
|
-
const approvalDir =
|
|
9277
|
+
const approvalDir = path18.join(paths.approvalsDir, approvalId);
|
|
8522
9278
|
await ensureDir(approvalDir);
|
|
8523
|
-
await ensureDir(
|
|
8524
|
-
await ensureDir(
|
|
9279
|
+
await ensureDir(path18.join(approvalDir, "wiki"));
|
|
9280
|
+
await ensureDir(path18.join(approvalDir, "state"));
|
|
8525
9281
|
for (const file of changedFiles) {
|
|
8526
|
-
const targetPath =
|
|
8527
|
-
await ensureDir(
|
|
8528
|
-
await
|
|
9282
|
+
const targetPath = path18.join(approvalDir, "wiki", file.relativePath);
|
|
9283
|
+
await ensureDir(path18.dirname(targetPath));
|
|
9284
|
+
await fs15.writeFile(targetPath, file.content, "utf8");
|
|
8529
9285
|
}
|
|
8530
|
-
await
|
|
9286
|
+
await fs15.writeFile(path18.join(approvalDir, "state", "graph.json"), JSON.stringify(graph, null, 2), "utf8");
|
|
8531
9287
|
await writeApprovalManifest(paths, {
|
|
8532
9288
|
approvalId,
|
|
8533
9289
|
createdAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
@@ -8587,7 +9343,7 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
8587
9343
|
confidence: 1
|
|
8588
9344
|
});
|
|
8589
9345
|
const sourceRecord = await buildManagedGraphPage(
|
|
8590
|
-
|
|
9346
|
+
path18.join(paths.wikiDir, preview.path),
|
|
8591
9347
|
{
|
|
8592
9348
|
managedBy: "system",
|
|
8593
9349
|
confidence: 1,
|
|
@@ -8632,7 +9388,7 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
8632
9388
|
);
|
|
8633
9389
|
records.push(
|
|
8634
9390
|
await buildManagedGraphPage(
|
|
8635
|
-
|
|
9391
|
+
path18.join(paths.wikiDir, modulePreview.path),
|
|
8636
9392
|
{
|
|
8637
9393
|
managedBy: "system",
|
|
8638
9394
|
confidence: 1,
|
|
@@ -8658,15 +9414,15 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
8658
9414
|
const itemKind = kind === "concepts" ? "concept" : "entity";
|
|
8659
9415
|
const slug = slugify(aggregate.name);
|
|
8660
9416
|
const pageId = `${itemKind}:${slug}`;
|
|
8661
|
-
const sourceIds =
|
|
9417
|
+
const sourceIds = uniqueStrings3(aggregate.sourceAnalyses.map((item) => item.sourceId));
|
|
8662
9418
|
const projectIds = scopedProjectIdsFromSources(sourceIds, input.sourceProjects);
|
|
8663
9419
|
const schemaHash = effectiveHashForProject(input.schemas, projectIds[0] ?? null);
|
|
8664
9420
|
const previousEntry = input.previousState?.candidateHistory?.[pageId];
|
|
8665
9421
|
const promoted = previousEntry?.status === "active" || promoteCandidates && shouldPromoteCandidate(previousEntry, sourceIds);
|
|
8666
9422
|
const relativePath = promoted ? activeAggregatePath(itemKind, slug) : candidatePagePathFor(itemKind, slug);
|
|
8667
9423
|
const fallbackPaths = [
|
|
8668
|
-
|
|
8669
|
-
|
|
9424
|
+
path18.join(paths.wikiDir, activeAggregatePath(itemKind, slug)),
|
|
9425
|
+
path18.join(paths.wikiDir, candidatePagePathFor(itemKind, slug))
|
|
8670
9426
|
];
|
|
8671
9427
|
const confidence = nodeConfidence(aggregate.sourceAnalyses.length);
|
|
8672
9428
|
const preview = emptyGraphPage({
|
|
@@ -8683,7 +9439,7 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
8683
9439
|
status: promoted ? "active" : "candidate"
|
|
8684
9440
|
});
|
|
8685
9441
|
const pageRecord = await buildManagedGraphPage(
|
|
8686
|
-
|
|
9442
|
+
path18.join(paths.wikiDir, relativePath),
|
|
8687
9443
|
{
|
|
8688
9444
|
status: promoted ? "active" : "candidate",
|
|
8689
9445
|
managedBy: "system",
|
|
@@ -8724,9 +9480,9 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
8724
9480
|
const compiledPages = records.map((record) => record.page);
|
|
8725
9481
|
const basePages = [...compiledPages, ...input.outputPages, ...input.insightPages];
|
|
8726
9482
|
const baseGraph = buildGraph(input.manifests, input.analyses, basePages, input.sourceProjects, input.codeIndex);
|
|
8727
|
-
const
|
|
8728
|
-
records.push(...
|
|
8729
|
-
const allPages = [...basePages, ...
|
|
9483
|
+
const graphOrientation = await buildGraphOrientationPages(baseGraph, paths, globalSchemaHash, input.previousState?.generatedAt);
|
|
9484
|
+
records.push(...graphOrientation.records);
|
|
9485
|
+
const allPages = [...basePages, ...graphOrientation.records.map((record) => record.page)];
|
|
8730
9486
|
const graph = {
|
|
8731
9487
|
...baseGraph,
|
|
8732
9488
|
pages: allPages
|
|
@@ -8764,7 +9520,7 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
8764
9520
|
confidence: 1
|
|
8765
9521
|
}),
|
|
8766
9522
|
content: await buildManagedContent(
|
|
8767
|
-
|
|
9523
|
+
path18.join(paths.wikiDir, "projects", "index.md"),
|
|
8768
9524
|
{
|
|
8769
9525
|
managedBy: "system",
|
|
8770
9526
|
compiledFrom: indexCompiledFrom(projectIndexRefs)
|
|
@@ -8788,7 +9544,7 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
8788
9544
|
records.push({
|
|
8789
9545
|
page: projectIndexRef,
|
|
8790
9546
|
content: await buildManagedContent(
|
|
8791
|
-
|
|
9547
|
+
path18.join(paths.wikiDir, projectIndexRef.path),
|
|
8792
9548
|
{
|
|
8793
9549
|
managedBy: "system",
|
|
8794
9550
|
compiledFrom: indexCompiledFrom(Object.values(sections).flat())
|
|
@@ -8816,7 +9572,7 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
8816
9572
|
confidence: 1
|
|
8817
9573
|
}),
|
|
8818
9574
|
content: await buildManagedContent(
|
|
8819
|
-
|
|
9575
|
+
path18.join(paths.wikiDir, "index.md"),
|
|
8820
9576
|
{
|
|
8821
9577
|
managedBy: "system",
|
|
8822
9578
|
compiledFrom: indexCompiledFrom(allPages)
|
|
@@ -8847,7 +9603,7 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
8847
9603
|
confidence: 1
|
|
8848
9604
|
}),
|
|
8849
9605
|
content: await buildManagedContent(
|
|
8850
|
-
|
|
9606
|
+
path18.join(paths.wikiDir, relativePath),
|
|
8851
9607
|
{
|
|
8852
9608
|
managedBy: "system",
|
|
8853
9609
|
compiledFrom: indexCompiledFrom(pages)
|
|
@@ -8858,12 +9614,12 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
8858
9614
|
}
|
|
8859
9615
|
const nextPagePaths = new Set(records.map((record) => record.page.path));
|
|
8860
9616
|
const obsoleteGraphPaths = (previousGraph?.pages ?? []).filter((page) => page.kind !== "output" && page.kind !== "insight").map((page) => page.path).filter((relativePath) => !nextPagePaths.has(relativePath));
|
|
8861
|
-
const existingProjectIndexPaths = (await listFilesRecursive(paths.projectsDir)).filter((absolutePath) => absolutePath.endsWith(".md")).map((absolutePath) => toPosix(
|
|
8862
|
-
const obsoletePaths =
|
|
9617
|
+
const existingProjectIndexPaths = (await listFilesRecursive(paths.projectsDir)).filter((absolutePath) => absolutePath.endsWith(".md")).map((absolutePath) => toPosix(path18.relative(paths.wikiDir, absolutePath))).filter((relativePath) => !nextPagePaths.has(relativePath));
|
|
9618
|
+
const obsoletePaths = uniqueStrings3([...obsoleteGraphPaths, ...existingProjectIndexPaths]);
|
|
8863
9619
|
const changedFiles = [];
|
|
8864
9620
|
for (const record of records) {
|
|
8865
|
-
const absolutePath =
|
|
8866
|
-
const current = await fileExists(absolutePath) ? await
|
|
9621
|
+
const absolutePath = path18.join(paths.wikiDir, record.page.path);
|
|
9622
|
+
const current = await fileExists(absolutePath) ? await fs15.readFile(absolutePath, "utf8") : null;
|
|
8867
9623
|
if (current !== record.content) {
|
|
8868
9624
|
changedPages.push(record.page.path);
|
|
8869
9625
|
changedFiles.push({ relativePath: record.page.path, content: record.content });
|
|
@@ -8888,9 +9644,10 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
8888
9644
|
await writePage(paths.wikiDir, record.page.path, record.content, writeChanges);
|
|
8889
9645
|
}
|
|
8890
9646
|
for (const relativePath of obsoletePaths) {
|
|
8891
|
-
await
|
|
9647
|
+
await fs15.rm(path18.join(paths.wikiDir, relativePath), { force: true });
|
|
8892
9648
|
}
|
|
8893
9649
|
await writeJsonFile(paths.graphPath, graph);
|
|
9650
|
+
await writeJsonFile(path18.join(paths.wikiDir, "graph", "report.json"), graphOrientation.report);
|
|
8894
9651
|
await writeJsonFile(paths.codeIndexPath, input.codeIndex);
|
|
8895
9652
|
await writeJsonFile(paths.compileStatePath, {
|
|
8896
9653
|
generatedAt: graph.generatedAt,
|
|
@@ -8916,7 +9673,7 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
8916
9673
|
return {
|
|
8917
9674
|
graph,
|
|
8918
9675
|
allPages,
|
|
8919
|
-
changedPages:
|
|
9676
|
+
changedPages: uniqueStrings3([...changedPages, ...writeChanges]),
|
|
8920
9677
|
promotedPageIds,
|
|
8921
9678
|
candidatePageCount: candidatePages.length,
|
|
8922
9679
|
staged: false
|
|
@@ -8925,18 +9682,20 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
8925
9682
|
async function refreshIndexesAndSearch(rootDir, pages) {
|
|
8926
9683
|
const { config, paths } = await loadVaultConfig(rootDir);
|
|
8927
9684
|
const schemas = await loadVaultSchemas(rootDir);
|
|
9685
|
+
const compileState = await readJsonFile(paths.compileStatePath);
|
|
8928
9686
|
const globalSchemaHash = schemas.effective.global.hash;
|
|
8929
9687
|
const currentGraph = await readJsonFile(paths.graphPath);
|
|
8930
9688
|
const basePages = pages.filter((page) => page.kind !== "graph_report" && page.kind !== "community_summary");
|
|
8931
|
-
const
|
|
9689
|
+
const graphOrientation = currentGraph ? await buildGraphOrientationPages(
|
|
8932
9690
|
{
|
|
8933
9691
|
...currentGraph,
|
|
8934
9692
|
pages: basePages
|
|
8935
9693
|
},
|
|
8936
9694
|
paths,
|
|
8937
|
-
globalSchemaHash
|
|
8938
|
-
|
|
8939
|
-
|
|
9695
|
+
globalSchemaHash,
|
|
9696
|
+
compileState?.generatedAt
|
|
9697
|
+
) : { records: [], report: null };
|
|
9698
|
+
const pagesWithGraph = sortGraphPages([...basePages, ...graphOrientation.records.map((record) => record.page)]);
|
|
8940
9699
|
if (currentGraph) {
|
|
8941
9700
|
await writeJsonFile(paths.graphPath, {
|
|
8942
9701
|
...currentGraph,
|
|
@@ -8959,17 +9718,17 @@ async function refreshIndexesAndSearch(rootDir, pages) {
|
|
|
8959
9718
|
})
|
|
8960
9719
|
);
|
|
8961
9720
|
await Promise.all([
|
|
8962
|
-
ensureDir(
|
|
8963
|
-
ensureDir(
|
|
8964
|
-
ensureDir(
|
|
8965
|
-
ensureDir(
|
|
8966
|
-
ensureDir(
|
|
8967
|
-
ensureDir(
|
|
8968
|
-
ensureDir(
|
|
8969
|
-
ensureDir(
|
|
8970
|
-
ensureDir(
|
|
9721
|
+
ensureDir(path18.join(paths.wikiDir, "sources")),
|
|
9722
|
+
ensureDir(path18.join(paths.wikiDir, "code")),
|
|
9723
|
+
ensureDir(path18.join(paths.wikiDir, "concepts")),
|
|
9724
|
+
ensureDir(path18.join(paths.wikiDir, "entities")),
|
|
9725
|
+
ensureDir(path18.join(paths.wikiDir, "outputs")),
|
|
9726
|
+
ensureDir(path18.join(paths.wikiDir, "graph")),
|
|
9727
|
+
ensureDir(path18.join(paths.wikiDir, "graph", "communities")),
|
|
9728
|
+
ensureDir(path18.join(paths.wikiDir, "projects")),
|
|
9729
|
+
ensureDir(path18.join(paths.wikiDir, "candidates"))
|
|
8971
9730
|
]);
|
|
8972
|
-
const projectsIndexPath =
|
|
9731
|
+
const projectsIndexPath = path18.join(paths.wikiDir, "projects", "index.md");
|
|
8973
9732
|
await writeFileIfChanged(
|
|
8974
9733
|
projectsIndexPath,
|
|
8975
9734
|
await buildManagedContent(
|
|
@@ -8990,7 +9749,7 @@ async function refreshIndexesAndSearch(rootDir, pages) {
|
|
|
8990
9749
|
outputs: pages.filter((page) => page.kind === "output" && page.projectIds.includes(project.id)),
|
|
8991
9750
|
candidates: pages.filter((page) => page.status === "candidate" && page.projectIds.includes(project.id))
|
|
8992
9751
|
};
|
|
8993
|
-
const absolutePath =
|
|
9752
|
+
const absolutePath = path18.join(paths.wikiDir, "projects", project.id, "index.md");
|
|
8994
9753
|
await writeFileIfChanged(
|
|
8995
9754
|
absolutePath,
|
|
8996
9755
|
await buildManagedContent(
|
|
@@ -9008,7 +9767,7 @@ async function refreshIndexesAndSearch(rootDir, pages) {
|
|
|
9008
9767
|
)
|
|
9009
9768
|
);
|
|
9010
9769
|
}
|
|
9011
|
-
const rootIndexPath =
|
|
9770
|
+
const rootIndexPath = path18.join(paths.wikiDir, "index.md");
|
|
9012
9771
|
await writeFileIfChanged(
|
|
9013
9772
|
rootIndexPath,
|
|
9014
9773
|
await buildManagedContent(
|
|
@@ -9029,7 +9788,7 @@ async function refreshIndexesAndSearch(rootDir, pages) {
|
|
|
9029
9788
|
["candidates/index.md", "candidates", pagesWithGraph.filter((page) => page.status === "candidate")],
|
|
9030
9789
|
["graph/index.md", "graph", pagesWithGraph.filter((page) => page.kind === "graph_report" || page.kind === "community_summary")]
|
|
9031
9790
|
]) {
|
|
9032
|
-
const absolutePath =
|
|
9791
|
+
const absolutePath = path18.join(paths.wikiDir, relativePath);
|
|
9033
9792
|
await writeFileIfChanged(
|
|
9034
9793
|
absolutePath,
|
|
9035
9794
|
await buildManagedContent(
|
|
@@ -9042,21 +9801,24 @@ async function refreshIndexesAndSearch(rootDir, pages) {
|
|
|
9042
9801
|
)
|
|
9043
9802
|
);
|
|
9044
9803
|
}
|
|
9045
|
-
for (const record of
|
|
9046
|
-
await writeFileIfChanged(
|
|
9804
|
+
for (const record of graphOrientation.records) {
|
|
9805
|
+
await writeFileIfChanged(path18.join(paths.wikiDir, record.page.path), record.content);
|
|
9047
9806
|
}
|
|
9048
|
-
|
|
9807
|
+
if (graphOrientation.report) {
|
|
9808
|
+
await writeJsonFile(path18.join(paths.wikiDir, "graph", "report.json"), graphOrientation.report);
|
|
9809
|
+
}
|
|
9810
|
+
const existingProjectIndexPaths = (await listFilesRecursive(paths.projectsDir)).filter((absolutePath) => absolutePath.endsWith(".md")).map((absolutePath) => toPosix(path18.relative(paths.wikiDir, absolutePath)));
|
|
9049
9811
|
const allowedProjectIndexPaths = /* @__PURE__ */ new Set([
|
|
9050
9812
|
"projects/index.md",
|
|
9051
9813
|
...configuredProjects.map((project) => `projects/${project.id}/index.md`)
|
|
9052
9814
|
]);
|
|
9053
9815
|
await Promise.all(
|
|
9054
|
-
existingProjectIndexPaths.filter((relativePath) => !allowedProjectIndexPaths.has(relativePath)).map((relativePath) =>
|
|
9816
|
+
existingProjectIndexPaths.filter((relativePath) => !allowedProjectIndexPaths.has(relativePath)).map((relativePath) => fs15.rm(path18.join(paths.wikiDir, relativePath), { force: true }))
|
|
9055
9817
|
);
|
|
9056
|
-
const existingGraphPages = (await listFilesRecursive(
|
|
9057
|
-
const allowedGraphPages = /* @__PURE__ */ new Set(["graph/index.md", ...
|
|
9818
|
+
const existingGraphPages = (await listFilesRecursive(path18.join(paths.wikiDir, "graph").replace(/\/$/, "")).catch(() => [])).filter((absolutePath) => absolutePath.endsWith(".md")).map((absolutePath) => toPosix(path18.relative(paths.wikiDir, absolutePath)));
|
|
9819
|
+
const allowedGraphPages = /* @__PURE__ */ new Set(["graph/index.md", ...graphOrientation.records.map((record) => record.page.path)]);
|
|
9058
9820
|
await Promise.all(
|
|
9059
|
-
existingGraphPages.filter((relativePath) => !allowedGraphPages.has(relativePath)).map((relativePath) =>
|
|
9821
|
+
existingGraphPages.filter((relativePath) => !allowedGraphPages.has(relativePath)).map((relativePath) => fs15.rm(path18.join(paths.wikiDir, relativePath), { force: true }))
|
|
9060
9822
|
);
|
|
9061
9823
|
await rebuildSearchIndex(paths.searchDbPath, pagesWithGraph, paths.wikiDir);
|
|
9062
9824
|
}
|
|
@@ -9071,12 +9833,12 @@ async function prepareOutputPageSave(rootDir, input) {
|
|
|
9071
9833
|
status: "active",
|
|
9072
9834
|
createdAt: now,
|
|
9073
9835
|
updatedAt: now,
|
|
9074
|
-
compiledFrom:
|
|
9836
|
+
compiledFrom: uniqueStrings3(input.relatedSourceIds ?? input.citations),
|
|
9075
9837
|
managedBy: "system",
|
|
9076
9838
|
confidence: 0.74
|
|
9077
9839
|
}
|
|
9078
9840
|
});
|
|
9079
|
-
const absolutePath =
|
|
9841
|
+
const absolutePath = path18.join(paths.wikiDir, output.page.path);
|
|
9080
9842
|
return {
|
|
9081
9843
|
page: output.page,
|
|
9082
9844
|
savedPath: absolutePath,
|
|
@@ -9088,15 +9850,15 @@ async function prepareOutputPageSave(rootDir, input) {
|
|
|
9088
9850
|
async function persistOutputPage(rootDir, input) {
|
|
9089
9851
|
const { paths } = await loadVaultConfig(rootDir);
|
|
9090
9852
|
const prepared = await prepareOutputPageSave(rootDir, input);
|
|
9091
|
-
await ensureDir(
|
|
9092
|
-
await
|
|
9853
|
+
await ensureDir(path18.dirname(prepared.savedPath));
|
|
9854
|
+
await fs15.writeFile(prepared.savedPath, prepared.content, "utf8");
|
|
9093
9855
|
for (const assetFile of prepared.assetFiles) {
|
|
9094
|
-
const assetPath =
|
|
9095
|
-
await ensureDir(
|
|
9856
|
+
const assetPath = path18.join(paths.wikiDir, assetFile.relativePath);
|
|
9857
|
+
await ensureDir(path18.dirname(assetPath));
|
|
9096
9858
|
if (typeof assetFile.content === "string") {
|
|
9097
|
-
await
|
|
9859
|
+
await fs15.writeFile(assetPath, assetFile.content, assetFile.encoding ?? "utf8");
|
|
9098
9860
|
} else {
|
|
9099
|
-
await
|
|
9861
|
+
await fs15.writeFile(assetPath, assetFile.content);
|
|
9100
9862
|
}
|
|
9101
9863
|
}
|
|
9102
9864
|
return { page: prepared.page, savedPath: prepared.savedPath, outputAssets: prepared.outputAssets };
|
|
@@ -9112,12 +9874,12 @@ async function prepareExploreHubSave(rootDir, input) {
|
|
|
9112
9874
|
status: "active",
|
|
9113
9875
|
createdAt: now,
|
|
9114
9876
|
updatedAt: now,
|
|
9115
|
-
compiledFrom:
|
|
9877
|
+
compiledFrom: uniqueStrings3(input.citations),
|
|
9116
9878
|
managedBy: "system",
|
|
9117
9879
|
confidence: 0.76
|
|
9118
9880
|
}
|
|
9119
9881
|
});
|
|
9120
|
-
const absolutePath =
|
|
9882
|
+
const absolutePath = path18.join(paths.wikiDir, hub.page.path);
|
|
9121
9883
|
return {
|
|
9122
9884
|
page: hub.page,
|
|
9123
9885
|
savedPath: absolutePath,
|
|
@@ -9129,15 +9891,15 @@ async function prepareExploreHubSave(rootDir, input) {
|
|
|
9129
9891
|
async function persistExploreHub(rootDir, input) {
|
|
9130
9892
|
const { paths } = await loadVaultConfig(rootDir);
|
|
9131
9893
|
const prepared = await prepareExploreHubSave(rootDir, input);
|
|
9132
|
-
await ensureDir(
|
|
9133
|
-
await
|
|
9894
|
+
await ensureDir(path18.dirname(prepared.savedPath));
|
|
9895
|
+
await fs15.writeFile(prepared.savedPath, prepared.content, "utf8");
|
|
9134
9896
|
for (const assetFile of prepared.assetFiles) {
|
|
9135
|
-
const assetPath =
|
|
9136
|
-
await ensureDir(
|
|
9897
|
+
const assetPath = path18.join(paths.wikiDir, assetFile.relativePath);
|
|
9898
|
+
await ensureDir(path18.dirname(assetPath));
|
|
9137
9899
|
if (typeof assetFile.content === "string") {
|
|
9138
|
-
await
|
|
9900
|
+
await fs15.writeFile(assetPath, assetFile.content, assetFile.encoding ?? "utf8");
|
|
9139
9901
|
} else {
|
|
9140
|
-
await
|
|
9902
|
+
await fs15.writeFile(assetPath, assetFile.content);
|
|
9141
9903
|
}
|
|
9142
9904
|
}
|
|
9143
9905
|
return { page: prepared.page, savedPath: prepared.savedPath, outputAssets: prepared.outputAssets };
|
|
@@ -9154,17 +9916,17 @@ async function stageOutputApprovalBundle(rootDir, stagedPages) {
|
|
|
9154
9916
|
}))
|
|
9155
9917
|
]);
|
|
9156
9918
|
const approvalId = `schedule-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}`;
|
|
9157
|
-
const approvalDir =
|
|
9919
|
+
const approvalDir = path18.join(paths.approvalsDir, approvalId);
|
|
9158
9920
|
await ensureDir(approvalDir);
|
|
9159
|
-
await ensureDir(
|
|
9160
|
-
await ensureDir(
|
|
9921
|
+
await ensureDir(path18.join(approvalDir, "wiki"));
|
|
9922
|
+
await ensureDir(path18.join(approvalDir, "state"));
|
|
9161
9923
|
for (const file of changedFiles) {
|
|
9162
|
-
const targetPath =
|
|
9163
|
-
await ensureDir(
|
|
9924
|
+
const targetPath = path18.join(approvalDir, "wiki", file.relativePath);
|
|
9925
|
+
await ensureDir(path18.dirname(targetPath));
|
|
9164
9926
|
if ("binary" in file && file.binary) {
|
|
9165
|
-
await
|
|
9927
|
+
await fs15.writeFile(targetPath, Buffer.from(file.content, "base64"));
|
|
9166
9928
|
} else {
|
|
9167
|
-
await
|
|
9929
|
+
await fs15.writeFile(targetPath, file.content, "utf8");
|
|
9168
9930
|
}
|
|
9169
9931
|
}
|
|
9170
9932
|
const nextPages = sortGraphPages([
|
|
@@ -9178,7 +9940,7 @@ async function stageOutputApprovalBundle(rootDir, stagedPages) {
|
|
|
9178
9940
|
sources: previousGraph?.sources ?? [],
|
|
9179
9941
|
pages: nextPages
|
|
9180
9942
|
};
|
|
9181
|
-
await
|
|
9943
|
+
await fs15.writeFile(path18.join(approvalDir, "state", "graph.json"), JSON.stringify(graph, null, 2), "utf8");
|
|
9182
9944
|
await writeApprovalManifest(paths, {
|
|
9183
9945
|
approvalId,
|
|
9184
9946
|
createdAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
@@ -9207,10 +9969,10 @@ async function executeQuery(rootDir, question, format) {
|
|
|
9207
9969
|
const searchResults = searchPages(paths.searchDbPath, question, 5);
|
|
9208
9970
|
const excerpts = await Promise.all(
|
|
9209
9971
|
searchResults.map(async (result) => {
|
|
9210
|
-
const absolutePath =
|
|
9972
|
+
const absolutePath = path18.join(paths.wikiDir, result.path);
|
|
9211
9973
|
try {
|
|
9212
|
-
const content = await
|
|
9213
|
-
const parsed =
|
|
9974
|
+
const content = await fs15.readFile(absolutePath, "utf8");
|
|
9975
|
+
const parsed = matter9(content);
|
|
9214
9976
|
return `# ${result.title}
|
|
9215
9977
|
${truncate(normalizeWhitespace(parsed.content), 1200)}`;
|
|
9216
9978
|
} catch {
|
|
@@ -9307,8 +10069,8 @@ async function generateFollowUpQuestions(rootDir, question, answer) {
|
|
|
9307
10069
|
Current answer:
|
|
9308
10070
|
${answer}`
|
|
9309
10071
|
},
|
|
9310
|
-
|
|
9311
|
-
questions:
|
|
10072
|
+
z7.object({
|
|
10073
|
+
questions: z7.array(z7.string().min(1)).max(5)
|
|
9312
10074
|
})
|
|
9313
10075
|
);
|
|
9314
10076
|
return uniqueBy(response.questions, (item) => item).filter((item) => item !== question);
|
|
@@ -9391,7 +10153,7 @@ function sortGraphPages(pages) {
|
|
|
9391
10153
|
async function listApprovals(rootDir) {
|
|
9392
10154
|
const { paths } = await loadVaultConfig(rootDir);
|
|
9393
10155
|
const manifests = await Promise.all(
|
|
9394
|
-
(await
|
|
10156
|
+
(await fs15.readdir(paths.approvalsDir, { withFileTypes: true }).catch(() => [])).filter((entry) => entry.isDirectory()).map(async (entry) => {
|
|
9395
10157
|
try {
|
|
9396
10158
|
return await readApprovalManifest(paths, entry.name);
|
|
9397
10159
|
} catch {
|
|
@@ -9407,8 +10169,8 @@ async function readApproval(rootDir, approvalId) {
|
|
|
9407
10169
|
const details = await Promise.all(
|
|
9408
10170
|
manifest.entries.map(async (entry) => {
|
|
9409
10171
|
const currentPath = entry.previousPath ?? entry.nextPath;
|
|
9410
|
-
const currentContent = currentPath ? await
|
|
9411
|
-
const stagedContent = entry.nextPath ? await
|
|
10172
|
+
const currentContent = currentPath ? await fs15.readFile(path18.join(paths.wikiDir, currentPath), "utf8").catch(() => void 0) : void 0;
|
|
10173
|
+
const stagedContent = entry.nextPath ? await fs15.readFile(path18.join(paths.approvalsDir, approvalId, "wiki", entry.nextPath), "utf8").catch(() => void 0) : void 0;
|
|
9412
10174
|
return {
|
|
9413
10175
|
...entry,
|
|
9414
10176
|
currentContent,
|
|
@@ -9436,26 +10198,26 @@ async function acceptApproval(rootDir, approvalId, targets = []) {
|
|
|
9436
10198
|
if (!entry.nextPath) {
|
|
9437
10199
|
throw new Error(`Approval entry ${entry.pageId} is missing a staged path.`);
|
|
9438
10200
|
}
|
|
9439
|
-
const stagedAbsolutePath =
|
|
9440
|
-
const stagedContent = await
|
|
9441
|
-
const targetAbsolutePath =
|
|
9442
|
-
await ensureDir(
|
|
9443
|
-
await
|
|
10201
|
+
const stagedAbsolutePath = path18.join(paths.approvalsDir, approvalId, "wiki", entry.nextPath);
|
|
10202
|
+
const stagedContent = await fs15.readFile(stagedAbsolutePath, "utf8");
|
|
10203
|
+
const targetAbsolutePath = path18.join(paths.wikiDir, entry.nextPath);
|
|
10204
|
+
await ensureDir(path18.dirname(targetAbsolutePath));
|
|
10205
|
+
await fs15.writeFile(targetAbsolutePath, stagedContent, "utf8");
|
|
9444
10206
|
if (entry.changeType === "promote" && entry.previousPath) {
|
|
9445
|
-
await
|
|
10207
|
+
await fs15.rm(path18.join(paths.wikiDir, entry.previousPath), { force: true });
|
|
9446
10208
|
}
|
|
9447
10209
|
const nextPage = bundleGraph?.pages.find((page) => page.id === entry.pageId && page.path === entry.nextPath) ?? parseStoredPage(entry.nextPath, stagedContent);
|
|
9448
10210
|
if (nextPage.kind === "output" && nextPage.outputAssets?.length) {
|
|
9449
|
-
const outputAssetDir =
|
|
9450
|
-
await
|
|
10211
|
+
const outputAssetDir = path18.join(paths.wikiDir, "outputs", "assets", path18.basename(nextPage.path, ".md"));
|
|
10212
|
+
await fs15.rm(outputAssetDir, { recursive: true, force: true });
|
|
9451
10213
|
for (const asset of nextPage.outputAssets) {
|
|
9452
|
-
const stagedAssetPath =
|
|
10214
|
+
const stagedAssetPath = path18.join(paths.approvalsDir, approvalId, "wiki", asset.path);
|
|
9453
10215
|
if (!await fileExists(stagedAssetPath)) {
|
|
9454
10216
|
continue;
|
|
9455
10217
|
}
|
|
9456
|
-
const targetAssetPath =
|
|
9457
|
-
await ensureDir(
|
|
9458
|
-
await
|
|
10218
|
+
const targetAssetPath = path18.join(paths.wikiDir, asset.path);
|
|
10219
|
+
await ensureDir(path18.dirname(targetAssetPath));
|
|
10220
|
+
await fs15.copyFile(stagedAssetPath, targetAssetPath);
|
|
9459
10221
|
}
|
|
9460
10222
|
}
|
|
9461
10223
|
nextPages = nextPages.filter(
|
|
@@ -9466,10 +10228,10 @@ async function acceptApproval(rootDir, approvalId, targets = []) {
|
|
|
9466
10228
|
} else {
|
|
9467
10229
|
const deletedPage = nextPages.find((page) => page.id === entry.pageId || page.path === entry.previousPath) ?? bundleGraph?.pages.find((page) => page.id === entry.pageId || page.path === entry.previousPath) ?? null;
|
|
9468
10230
|
if (entry.previousPath) {
|
|
9469
|
-
await
|
|
10231
|
+
await fs15.rm(path18.join(paths.wikiDir, entry.previousPath), { force: true });
|
|
9470
10232
|
}
|
|
9471
10233
|
if (deletedPage?.kind === "output") {
|
|
9472
|
-
await
|
|
10234
|
+
await fs15.rm(path18.join(paths.wikiDir, "outputs", "assets", path18.basename(deletedPage.path, ".md")), {
|
|
9473
10235
|
recursive: true,
|
|
9474
10236
|
force: true
|
|
9475
10237
|
});
|
|
@@ -9559,22 +10321,22 @@ async function promoteCandidate(rootDir, target) {
|
|
|
9559
10321
|
const { paths } = await loadVaultConfig(rootDir);
|
|
9560
10322
|
const graph = await readJsonFile(paths.graphPath);
|
|
9561
10323
|
const candidate = resolveCandidateTarget(graph?.pages ?? [], target);
|
|
9562
|
-
const raw = await
|
|
9563
|
-
const parsed =
|
|
10324
|
+
const raw = await fs15.readFile(path18.join(paths.wikiDir, candidate.path), "utf8");
|
|
10325
|
+
const parsed = matter9(raw);
|
|
9564
10326
|
const nextUpdatedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
9565
|
-
const nextContent =
|
|
10327
|
+
const nextContent = matter9.stringify(parsed.content, {
|
|
9566
10328
|
...parsed.data,
|
|
9567
10329
|
status: "active",
|
|
9568
10330
|
updated_at: nextUpdatedAt,
|
|
9569
|
-
tags:
|
|
10331
|
+
tags: uniqueStrings3([candidate.kind, ...Array.isArray(parsed.data.tags) ? parsed.data.tags : []]).filter(
|
|
9570
10332
|
(tag) => tag !== "candidate"
|
|
9571
10333
|
)
|
|
9572
10334
|
});
|
|
9573
10335
|
const nextPath = candidateActivePath(candidate);
|
|
9574
|
-
const nextAbsolutePath =
|
|
9575
|
-
await ensureDir(
|
|
9576
|
-
await
|
|
9577
|
-
await
|
|
10336
|
+
const nextAbsolutePath = path18.join(paths.wikiDir, nextPath);
|
|
10337
|
+
await ensureDir(path18.dirname(nextAbsolutePath));
|
|
10338
|
+
await fs15.writeFile(nextAbsolutePath, nextContent, "utf8");
|
|
10339
|
+
await fs15.rm(path18.join(paths.wikiDir, candidate.path), { force: true });
|
|
9578
10340
|
const nextPage = parseStoredPage(nextPath, nextContent, { createdAt: candidate.createdAt, updatedAt: nextUpdatedAt });
|
|
9579
10341
|
const nextPages = sortGraphPages(
|
|
9580
10342
|
(graph?.pages ?? []).filter((page) => page.id !== candidate.id && page.path !== candidate.path).concat(nextPage)
|
|
@@ -9618,7 +10380,7 @@ async function archiveCandidate(rootDir, target) {
|
|
|
9618
10380
|
const { paths } = await loadVaultConfig(rootDir);
|
|
9619
10381
|
const graph = await readJsonFile(paths.graphPath);
|
|
9620
10382
|
const candidate = resolveCandidateTarget(graph?.pages ?? [], target);
|
|
9621
|
-
await
|
|
10383
|
+
await fs15.rm(path18.join(paths.wikiDir, candidate.path), { force: true });
|
|
9622
10384
|
const nextPages = sortGraphPages((graph?.pages ?? []).filter((page) => page.id !== candidate.id && page.path !== candidate.path));
|
|
9623
10385
|
const nextGraph = {
|
|
9624
10386
|
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
@@ -9656,18 +10418,18 @@ async function archiveCandidate(rootDir, target) {
|
|
|
9656
10418
|
}
|
|
9657
10419
|
async function ensureObsidianWorkspace(rootDir) {
|
|
9658
10420
|
const { config } = await loadVaultConfig(rootDir);
|
|
9659
|
-
const obsidianDir =
|
|
10421
|
+
const obsidianDir = path18.join(rootDir, ".obsidian");
|
|
9660
10422
|
const projectIds = projectEntries(config).map((project) => project.id);
|
|
9661
10423
|
await ensureDir(obsidianDir);
|
|
9662
10424
|
await Promise.all([
|
|
9663
|
-
writeJsonFile(
|
|
10425
|
+
writeJsonFile(path18.join(obsidianDir, "app.json"), {
|
|
9664
10426
|
alwaysUpdateLinks: true,
|
|
9665
10427
|
newFileLocation: "folder",
|
|
9666
10428
|
newFileFolderPath: "wiki/insights",
|
|
9667
10429
|
useMarkdownLinks: false,
|
|
9668
10430
|
attachmentFolderPath: "raw/assets"
|
|
9669
10431
|
}),
|
|
9670
|
-
writeJsonFile(
|
|
10432
|
+
writeJsonFile(path18.join(obsidianDir, "core-plugins.json"), [
|
|
9671
10433
|
"file-explorer",
|
|
9672
10434
|
"global-search",
|
|
9673
10435
|
"switcher",
|
|
@@ -9677,7 +10439,7 @@ async function ensureObsidianWorkspace(rootDir) {
|
|
|
9677
10439
|
"tag-pane",
|
|
9678
10440
|
"page-preview"
|
|
9679
10441
|
]),
|
|
9680
|
-
writeJsonFile(
|
|
10442
|
+
writeJsonFile(path18.join(obsidianDir, "graph.json"), {
|
|
9681
10443
|
"collapse-filter": false,
|
|
9682
10444
|
search: "",
|
|
9683
10445
|
showTags: true,
|
|
@@ -9689,7 +10451,7 @@ async function ensureObsidianWorkspace(rootDir) {
|
|
|
9689
10451
|
})),
|
|
9690
10452
|
localJumps: false
|
|
9691
10453
|
}),
|
|
9692
|
-
writeJsonFile(
|
|
10454
|
+
writeJsonFile(path18.join(obsidianDir, "workspace.json"), {
|
|
9693
10455
|
active: "root",
|
|
9694
10456
|
lastOpenFiles: ["wiki/index.md", "wiki/projects/index.md", "wiki/candidates/index.md", "wiki/insights/index.md"],
|
|
9695
10457
|
left: {
|
|
@@ -9704,11 +10466,11 @@ async function ensureObsidianWorkspace(rootDir) {
|
|
|
9704
10466
|
async function initVault(rootDir, options = {}) {
|
|
9705
10467
|
const { paths } = await initWorkspace(rootDir);
|
|
9706
10468
|
await installConfiguredAgents(rootDir);
|
|
9707
|
-
const insightsIndexPath =
|
|
10469
|
+
const insightsIndexPath = path18.join(paths.wikiDir, "insights", "index.md");
|
|
9708
10470
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
9709
10471
|
await writeFileIfChanged(
|
|
9710
10472
|
insightsIndexPath,
|
|
9711
|
-
|
|
10473
|
+
matter9.stringify(
|
|
9712
10474
|
[
|
|
9713
10475
|
"# Insights",
|
|
9714
10476
|
"",
|
|
@@ -9740,8 +10502,8 @@ async function initVault(rootDir, options = {}) {
|
|
|
9740
10502
|
)
|
|
9741
10503
|
);
|
|
9742
10504
|
await writeFileIfChanged(
|
|
9743
|
-
|
|
9744
|
-
|
|
10505
|
+
path18.join(paths.wikiDir, "projects", "index.md"),
|
|
10506
|
+
matter9.stringify(["# Projects", "", "- Run `swarmvault compile` to build project rollups.", ""].join("\n"), {
|
|
9745
10507
|
page_id: "projects:index",
|
|
9746
10508
|
kind: "index",
|
|
9747
10509
|
title: "Projects",
|
|
@@ -9762,8 +10524,8 @@ async function initVault(rootDir, options = {}) {
|
|
|
9762
10524
|
})
|
|
9763
10525
|
);
|
|
9764
10526
|
await writeFileIfChanged(
|
|
9765
|
-
|
|
9766
|
-
|
|
10527
|
+
path18.join(paths.wikiDir, "candidates", "index.md"),
|
|
10528
|
+
matter9.stringify(["# Candidates", "", "- Run `swarmvault compile` to stage candidate pages.", ""].join("\n"), {
|
|
9767
10529
|
page_id: "candidates:index",
|
|
9768
10530
|
kind: "index",
|
|
9769
10531
|
title: "Candidates",
|
|
@@ -9787,6 +10549,20 @@ async function initVault(rootDir, options = {}) {
|
|
|
9787
10549
|
await ensureObsidianWorkspace(rootDir);
|
|
9788
10550
|
}
|
|
9789
10551
|
}
|
|
10552
|
+
async function runConfiguredBenchmark(rootDir, config) {
|
|
10553
|
+
if (config.benchmark?.enabled === false) {
|
|
10554
|
+
return { ok: true };
|
|
10555
|
+
}
|
|
10556
|
+
try {
|
|
10557
|
+
await benchmarkVault(rootDir);
|
|
10558
|
+
return { ok: true };
|
|
10559
|
+
} catch (error) {
|
|
10560
|
+
return {
|
|
10561
|
+
ok: false,
|
|
10562
|
+
error: error instanceof Error ? error.message : String(error)
|
|
10563
|
+
};
|
|
10564
|
+
}
|
|
10565
|
+
}
|
|
9790
10566
|
async function compileVault(rootDir, options = {}) {
|
|
9791
10567
|
const startedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
9792
10568
|
const { config, paths } = await initWorkspace(rootDir);
|
|
@@ -9802,7 +10578,7 @@ async function compileVault(rootDir, options = {}) {
|
|
|
9802
10578
|
const currentInsightHashes = pageHashes(storedInsightPages);
|
|
9803
10579
|
const previousState = await readJsonFile(paths.compileStatePath);
|
|
9804
10580
|
const rootSchemaChanged = !previousState || previousState.rootSchemaHash !== schemas.root.hash;
|
|
9805
|
-
const effectiveSchemaChanged = !previousState || previousGlobalSchemaHash(previousState) !== schemas.effective.global.hash ||
|
|
10581
|
+
const effectiveSchemaChanged = !previousState || previousGlobalSchemaHash(previousState) !== schemas.effective.global.hash || uniqueStrings3([...Object.keys(previousState?.effectiveSchemaHashes?.projects ?? {}), ...Object.keys(schemas.effective.projects)]).some(
|
|
9806
10582
|
(projectId) => previousProjectSchemaHash(previousState, projectId) !== effectiveHashForProject(schemas, projectId)
|
|
9807
10583
|
);
|
|
9808
10584
|
const nextProjectConfigHash = projectConfigHash(config);
|
|
@@ -9835,6 +10611,10 @@ async function compileVault(rootDir, options = {}) {
|
|
|
9835
10611
|
}
|
|
9836
10612
|
if (dirty.length === 0 && !rootSchemaChanged && !effectiveSchemaChanged && !projectConfigChanged && !sourcesChanged && !outputsChanged && !insightsChanged && !pendingCandidatePromotion && artifactsExist && !options.approve) {
|
|
9837
10613
|
const graph = await readJsonFile(paths.graphPath);
|
|
10614
|
+
const benchmark2 = await runConfiguredBenchmark(rootDir, config);
|
|
10615
|
+
if (graph && benchmark2.ok) {
|
|
10616
|
+
await refreshIndexesAndSearch(rootDir, graph.pages);
|
|
10617
|
+
}
|
|
9838
10618
|
await recordSession(rootDir, {
|
|
9839
10619
|
operation: "compile",
|
|
9840
10620
|
title: `Compiled ${manifests.length} source(s)`,
|
|
@@ -9852,7 +10632,8 @@ async function compileVault(rootDir, options = {}) {
|
|
|
9852
10632
|
`clean=${manifests.length}`,
|
|
9853
10633
|
`outputs=${outputPages.length}`,
|
|
9854
10634
|
`insights=${insightPages.length}`,
|
|
9855
|
-
`schema=${schemas.effective.global.hash.slice(0, 12)}
|
|
10635
|
+
`schema=${schemas.effective.global.hash.slice(0, 12)}`,
|
|
10636
|
+
`benchmark=${benchmark2.ok ? "ok" : `error:${benchmark2.error}`}`
|
|
9856
10637
|
]
|
|
9857
10638
|
});
|
|
9858
10639
|
return {
|
|
@@ -9879,7 +10660,7 @@ async function compileVault(rootDir, options = {}) {
|
|
|
9879
10660
|
),
|
|
9880
10661
|
Promise.all(
|
|
9881
10662
|
clean.map(async (manifest) => {
|
|
9882
|
-
const cached = await readJsonFile(
|
|
10663
|
+
const cached = await readJsonFile(path18.join(paths.analysesDir, `${manifest.sourceId}.json`));
|
|
9883
10664
|
if (cached) {
|
|
9884
10665
|
return cached;
|
|
9885
10666
|
}
|
|
@@ -9903,22 +10684,22 @@ async function compileVault(rootDir, options = {}) {
|
|
|
9903
10684
|
}
|
|
9904
10685
|
const enriched = enrichResolvedCodeImports(manifest, analysis, codeIndex);
|
|
9905
10686
|
if (analysisSignature(enriched) !== analysisSignature(analysis)) {
|
|
9906
|
-
await writeJsonFile(
|
|
10687
|
+
await writeJsonFile(path18.join(paths.analysesDir, `${analysis.sourceId}.json`), enriched);
|
|
9907
10688
|
}
|
|
9908
10689
|
return enriched;
|
|
9909
10690
|
})
|
|
9910
10691
|
);
|
|
9911
10692
|
await Promise.all([
|
|
9912
|
-
ensureDir(
|
|
9913
|
-
ensureDir(
|
|
9914
|
-
ensureDir(
|
|
9915
|
-
ensureDir(
|
|
9916
|
-
ensureDir(
|
|
9917
|
-
ensureDir(
|
|
9918
|
-
ensureDir(
|
|
9919
|
-
ensureDir(
|
|
9920
|
-
ensureDir(
|
|
9921
|
-
ensureDir(
|
|
10693
|
+
ensureDir(path18.join(paths.wikiDir, "sources")),
|
|
10694
|
+
ensureDir(path18.join(paths.wikiDir, "code")),
|
|
10695
|
+
ensureDir(path18.join(paths.wikiDir, "concepts")),
|
|
10696
|
+
ensureDir(path18.join(paths.wikiDir, "entities")),
|
|
10697
|
+
ensureDir(path18.join(paths.wikiDir, "outputs")),
|
|
10698
|
+
ensureDir(path18.join(paths.wikiDir, "projects")),
|
|
10699
|
+
ensureDir(path18.join(paths.wikiDir, "insights")),
|
|
10700
|
+
ensureDir(path18.join(paths.wikiDir, "candidates")),
|
|
10701
|
+
ensureDir(path18.join(paths.wikiDir, "candidates", "concepts")),
|
|
10702
|
+
ensureDir(path18.join(paths.wikiDir, "candidates", "entities"))
|
|
9922
10703
|
]);
|
|
9923
10704
|
const sync = await syncVaultArtifacts(rootDir, {
|
|
9924
10705
|
schemas,
|
|
@@ -9970,6 +10751,10 @@ async function compileVault(rootDir, options = {}) {
|
|
|
9970
10751
|
postPassApprovalDir = staged.approvalDir;
|
|
9971
10752
|
}
|
|
9972
10753
|
}
|
|
10754
|
+
const benchmark = options.approve ? { ok: true } : await runConfiguredBenchmark(rootDir, config);
|
|
10755
|
+
if (!options.approve && benchmark.ok) {
|
|
10756
|
+
await refreshIndexesAndSearch(rootDir, sync.allPages);
|
|
10757
|
+
}
|
|
9973
10758
|
await recordSession(rootDir, {
|
|
9974
10759
|
operation: "compile",
|
|
9975
10760
|
title: `Compiled ${manifests.length} source(s)`,
|
|
@@ -9991,7 +10776,8 @@ async function compileVault(rootDir, options = {}) {
|
|
|
9991
10776
|
`promoted=${sync.promotedPageIds.length}`,
|
|
9992
10777
|
`staged=${sync.staged}`,
|
|
9993
10778
|
`postPassApproval=${postPassApprovalId ?? "none"}`,
|
|
9994
|
-
`schema=${schemas.effective.global.hash.slice(0, 12)}
|
|
10779
|
+
`schema=${schemas.effective.global.hash.slice(0, 12)}`,
|
|
10780
|
+
`benchmark=${benchmark.ok ? "ok" : `error:${benchmark.error}`}`
|
|
9995
10781
|
]
|
|
9996
10782
|
});
|
|
9997
10783
|
return {
|
|
@@ -10060,7 +10846,7 @@ async function queryVault(rootDir, options) {
|
|
|
10060
10846
|
assetFiles: staged.assetFiles
|
|
10061
10847
|
}
|
|
10062
10848
|
]);
|
|
10063
|
-
stagedPath =
|
|
10849
|
+
stagedPath = path18.join(approval.approvalDir, "wiki", staged.page.path);
|
|
10064
10850
|
savedPageId = staged.page.id;
|
|
10065
10851
|
approvalId = approval.approvalId;
|
|
10066
10852
|
approvalDir = approval.approvalDir;
|
|
@@ -10261,7 +11047,7 @@ ${orchestrationNotes.join("\n")}
|
|
|
10261
11047
|
citations: allCitations,
|
|
10262
11048
|
format: outputFormat,
|
|
10263
11049
|
relatedPageCount: stepPages.length,
|
|
10264
|
-
relatedNodeCount:
|
|
11050
|
+
relatedNodeCount: uniqueStrings3(stepPages.flatMap((page) => page.nodeIds)).length,
|
|
10265
11051
|
projectId: stepPages[0]?.projectIds[0] ?? null
|
|
10266
11052
|
});
|
|
10267
11053
|
const hubInput = {
|
|
@@ -10271,7 +11057,7 @@ ${orchestrationNotes.join("\n")}
|
|
|
10271
11057
|
citations: allCitations,
|
|
10272
11058
|
schemaHash: composeVaultSchema(
|
|
10273
11059
|
schemas.root,
|
|
10274
|
-
|
|
11060
|
+
uniqueStrings3(stepPages.flatMap((page) => page.projectIds).sort((left, right) => left.localeCompare(right))).map((projectId) => schemas.projects[projectId]).filter((schema) => Boolean(schema?.hash))
|
|
10275
11061
|
).hash,
|
|
10276
11062
|
outputFormat,
|
|
10277
11063
|
outputAssets: hubAssetBundle.outputAssets,
|
|
@@ -10316,9 +11102,9 @@ ${orchestrationNotes.join("\n")}
|
|
|
10316
11102
|
approvalId = approval.approvalId;
|
|
10317
11103
|
approvalDir = approval.approvalDir;
|
|
10318
11104
|
stepResults.forEach((result, index) => {
|
|
10319
|
-
result.stagedPath =
|
|
11105
|
+
result.stagedPath = path18.join(approval.approvalDir, "wiki", stagedStepPages[index]?.page.path ?? "");
|
|
10320
11106
|
});
|
|
10321
|
-
stagedHubPath =
|
|
11107
|
+
stagedHubPath = path18.join(approval.approvalDir, "wiki", hubPage.path);
|
|
10322
11108
|
} else {
|
|
10323
11109
|
await refreshVaultAfterOutputSave(rootDir);
|
|
10324
11110
|
}
|
|
@@ -10331,7 +11117,7 @@ ${orchestrationNotes.join("\n")}
|
|
|
10331
11117
|
providerId: provider.id,
|
|
10332
11118
|
success: true,
|
|
10333
11119
|
relatedSourceIds: [...relatedSourceIds],
|
|
10334
|
-
relatedPageIds:
|
|
11120
|
+
relatedPageIds: uniqueStrings3([...relatedPageIds, ...stepPages.map((page) => page.id), hubPage.id]),
|
|
10335
11121
|
relatedNodeIds: [...relatedNodeIds],
|
|
10336
11122
|
citations: allCitations,
|
|
10337
11123
|
tokenUsage: tokenUsage.inputTokens > 0 || tokenUsage.outputTokens > 0 ? {
|
|
@@ -10386,7 +11172,7 @@ async function queryGraphVault(rootDir, question, options = {}) {
|
|
|
10386
11172
|
return queryGraph(graph, question, searchResults, options);
|
|
10387
11173
|
}
|
|
10388
11174
|
async function benchmarkVault(rootDir, options = {}) {
|
|
10389
|
-
const { paths } = await loadVaultConfig(rootDir);
|
|
11175
|
+
const { config, paths } = await loadVaultConfig(rootDir);
|
|
10390
11176
|
const graph = await ensureCompiledGraph(rootDir);
|
|
10391
11177
|
const manifests = await listManifests(rootDir);
|
|
10392
11178
|
const pageContentsById = /* @__PURE__ */ new Map();
|
|
@@ -10398,15 +11184,17 @@ async function benchmarkVault(rootDir, options = {}) {
|
|
|
10398
11184
|
}
|
|
10399
11185
|
}
|
|
10400
11186
|
for (const page of graph.pages) {
|
|
10401
|
-
const absolutePath =
|
|
11187
|
+
const absolutePath = path18.join(paths.wikiDir, page.path);
|
|
10402
11188
|
if (!await fileExists(absolutePath)) {
|
|
10403
11189
|
continue;
|
|
10404
11190
|
}
|
|
10405
|
-
const parsed =
|
|
11191
|
+
const parsed = matter9(await fs15.readFile(absolutePath, "utf8"));
|
|
10406
11192
|
pageContentsById.set(page.id, parsed.content);
|
|
10407
11193
|
}
|
|
11194
|
+
const configuredQuestions = (config.benchmark?.questions ?? []).map((question) => normalizeWhitespace(question)).filter(Boolean);
|
|
11195
|
+
const maxQuestions = Math.max(1, options.maxQuestions ?? config.benchmark?.maxQuestions ?? 3);
|
|
10408
11196
|
const questions = (options.questions ?? []).map((question) => normalizeWhitespace(question)).filter(Boolean);
|
|
10409
|
-
const sampleQuestions = questions.length ? questions :
|
|
11197
|
+
const sampleQuestions = (questions.length ? questions : configuredQuestions.length ? configuredQuestions : defaultBenchmarkQuestionsForGraph(graph, maxQuestions)).slice(0, maxQuestions);
|
|
10410
11198
|
const perQuestion = sampleQuestions.map((question) => {
|
|
10411
11199
|
const searchResults = searchPages(paths.searchDbPath, question, { limit: 12 });
|
|
10412
11200
|
const result = queryGraph(graph, question, searchResults, { budget: 12 });
|
|
@@ -10416,6 +11204,7 @@ async function benchmarkVault(rootDir, options = {}) {
|
|
|
10416
11204
|
queryTokens: metrics.queryTokens,
|
|
10417
11205
|
reduction: metrics.reduction,
|
|
10418
11206
|
visitedNodeIds: result.visitedNodeIds,
|
|
11207
|
+
visitedEdgeIds: result.visitedEdgeIds,
|
|
10419
11208
|
pageIds: result.pageIds
|
|
10420
11209
|
};
|
|
10421
11210
|
});
|
|
@@ -10448,15 +11237,15 @@ async function listPages(rootDir) {
|
|
|
10448
11237
|
}
|
|
10449
11238
|
async function readPage(rootDir, relativePath) {
|
|
10450
11239
|
const { paths } = await loadVaultConfig(rootDir);
|
|
10451
|
-
const absolutePath =
|
|
11240
|
+
const absolutePath = path18.resolve(paths.wikiDir, relativePath);
|
|
10452
11241
|
if (!absolutePath.startsWith(paths.wikiDir) || !await fileExists(absolutePath)) {
|
|
10453
11242
|
return null;
|
|
10454
11243
|
}
|
|
10455
|
-
const raw = await
|
|
10456
|
-
const parsed =
|
|
11244
|
+
const raw = await fs15.readFile(absolutePath, "utf8");
|
|
11245
|
+
const parsed = matter9(raw);
|
|
10457
11246
|
return {
|
|
10458
11247
|
path: relativePath,
|
|
10459
|
-
title: typeof parsed.data.title === "string" ? parsed.data.title :
|
|
11248
|
+
title: typeof parsed.data.title === "string" ? parsed.data.title : path18.basename(relativePath, path18.extname(relativePath)),
|
|
10460
11249
|
frontmatter: parsed.data,
|
|
10461
11250
|
content: parsed.content
|
|
10462
11251
|
};
|
|
@@ -10492,7 +11281,7 @@ function structuralLintFindings(_rootDir, paths, graph, schemas, manifests, sour
|
|
|
10492
11281
|
severity: "warning",
|
|
10493
11282
|
code: "stale_page",
|
|
10494
11283
|
message: `Page ${page.title} is stale because the vault schema changed.`,
|
|
10495
|
-
pagePath:
|
|
11284
|
+
pagePath: path18.join(paths.wikiDir, page.path),
|
|
10496
11285
|
relatedPageIds: [page.id]
|
|
10497
11286
|
});
|
|
10498
11287
|
}
|
|
@@ -10503,7 +11292,7 @@ function structuralLintFindings(_rootDir, paths, graph, schemas, manifests, sour
|
|
|
10503
11292
|
severity: "warning",
|
|
10504
11293
|
code: "stale_page",
|
|
10505
11294
|
message: `Page ${page.title} is stale because source ${sourceId} changed.`,
|
|
10506
|
-
pagePath:
|
|
11295
|
+
pagePath: path18.join(paths.wikiDir, page.path),
|
|
10507
11296
|
relatedSourceIds: [sourceId],
|
|
10508
11297
|
relatedPageIds: [page.id]
|
|
10509
11298
|
});
|
|
@@ -10514,13 +11303,13 @@ function structuralLintFindings(_rootDir, paths, graph, schemas, manifests, sour
|
|
|
10514
11303
|
severity: "info",
|
|
10515
11304
|
code: "orphan_page",
|
|
10516
11305
|
message: `Page ${page.title} has no backlinks.`,
|
|
10517
|
-
pagePath:
|
|
11306
|
+
pagePath: path18.join(paths.wikiDir, page.path),
|
|
10518
11307
|
relatedPageIds: [page.id]
|
|
10519
11308
|
});
|
|
10520
11309
|
}
|
|
10521
|
-
const absolutePath =
|
|
11310
|
+
const absolutePath = path18.join(paths.wikiDir, page.path);
|
|
10522
11311
|
if (await fileExists(absolutePath)) {
|
|
10523
|
-
const content = await
|
|
11312
|
+
const content = await fs15.readFile(absolutePath, "utf8");
|
|
10524
11313
|
if (content.includes("## Claims")) {
|
|
10525
11314
|
const uncited = content.split("\n").filter((line) => line.startsWith("- ") && !line.includes("[source:"));
|
|
10526
11315
|
if (uncited.length) {
|
|
@@ -10580,7 +11369,7 @@ async function lintVault(rootDir, options = {}) {
|
|
|
10580
11369
|
providerId: provider?.id,
|
|
10581
11370
|
success: true,
|
|
10582
11371
|
relatedPageIds: graph.pages.map((page) => page.id),
|
|
10583
|
-
relatedSourceIds:
|
|
11372
|
+
relatedSourceIds: uniqueStrings3(graph.pages.flatMap((page) => page.sourceIds)),
|
|
10584
11373
|
lintFindingCount: findings.length,
|
|
10585
11374
|
lines: [`findings=${findings.length}`, `deep=${Boolean(options.deep)}`, `web=${Boolean(options.web)}`]
|
|
10586
11375
|
});
|
|
@@ -10600,7 +11389,7 @@ async function bootstrapDemo(rootDir, input) {
|
|
|
10600
11389
|
}
|
|
10601
11390
|
|
|
10602
11391
|
// src/mcp.ts
|
|
10603
|
-
var SERVER_VERSION = "0.1.
|
|
11392
|
+
var SERVER_VERSION = "0.1.22";
|
|
10604
11393
|
async function createMcpServer(rootDir) {
|
|
10605
11394
|
const server = new McpServer({
|
|
10606
11395
|
name: "swarmvault",
|
|
@@ -10622,8 +11411,8 @@ async function createMcpServer(rootDir) {
|
|
|
10622
11411
|
{
|
|
10623
11412
|
description: "Search compiled wiki pages using the local full-text index.",
|
|
10624
11413
|
inputSchema: {
|
|
10625
|
-
query:
|
|
10626
|
-
limit:
|
|
11414
|
+
query: z8.string().min(1).describe("Search query"),
|
|
11415
|
+
limit: z8.number().int().min(1).max(25).optional().describe("Maximum number of results")
|
|
10627
11416
|
}
|
|
10628
11417
|
},
|
|
10629
11418
|
async ({ query, limit }) => {
|
|
@@ -10636,7 +11425,7 @@ async function createMcpServer(rootDir) {
|
|
|
10636
11425
|
{
|
|
10637
11426
|
description: "Read a generated wiki page by its path relative to wiki/.",
|
|
10638
11427
|
inputSchema: {
|
|
10639
|
-
path:
|
|
11428
|
+
path: z8.string().min(1).describe("Path relative to wiki/, for example sources/example.md")
|
|
10640
11429
|
}
|
|
10641
11430
|
},
|
|
10642
11431
|
async ({ path: relativePath }) => {
|
|
@@ -10652,7 +11441,7 @@ async function createMcpServer(rootDir) {
|
|
|
10652
11441
|
{
|
|
10653
11442
|
description: "List source manifests in the current workspace.",
|
|
10654
11443
|
inputSchema: {
|
|
10655
|
-
limit:
|
|
11444
|
+
limit: z8.number().int().min(1).max(100).optional().describe("Maximum number of manifests to return")
|
|
10656
11445
|
}
|
|
10657
11446
|
},
|
|
10658
11447
|
async ({ limit }) => {
|
|
@@ -10665,9 +11454,9 @@ async function createMcpServer(rootDir) {
|
|
|
10665
11454
|
{
|
|
10666
11455
|
description: "Traverse the local graph from search seeds without calling a model provider.",
|
|
10667
11456
|
inputSchema: {
|
|
10668
|
-
question:
|
|
10669
|
-
traversal:
|
|
10670
|
-
budget:
|
|
11457
|
+
question: z8.string().min(1).describe("Question or graph search seed"),
|
|
11458
|
+
traversal: z8.enum(["bfs", "dfs"]).optional().describe("Traversal strategy"),
|
|
11459
|
+
budget: z8.number().int().min(3).max(50).optional().describe("Maximum nodes to summarize")
|
|
10671
11460
|
}
|
|
10672
11461
|
},
|
|
10673
11462
|
async ({ question, traversal, budget }) => {
|
|
@@ -10683,7 +11472,7 @@ async function createMcpServer(rootDir) {
|
|
|
10683
11472
|
{
|
|
10684
11473
|
description: "Explain a graph node, its page, community, and neighbors.",
|
|
10685
11474
|
inputSchema: {
|
|
10686
|
-
target:
|
|
11475
|
+
target: z8.string().min(1).describe("Node or page label/id")
|
|
10687
11476
|
}
|
|
10688
11477
|
},
|
|
10689
11478
|
async ({ target }) => {
|
|
@@ -10695,7 +11484,7 @@ async function createMcpServer(rootDir) {
|
|
|
10695
11484
|
{
|
|
10696
11485
|
description: "Return the neighbors of a graph node or page target.",
|
|
10697
11486
|
inputSchema: {
|
|
10698
|
-
target:
|
|
11487
|
+
target: z8.string().min(1).describe("Node or page label/id")
|
|
10699
11488
|
}
|
|
10700
11489
|
},
|
|
10701
11490
|
async ({ target }) => {
|
|
@@ -10708,8 +11497,8 @@ async function createMcpServer(rootDir) {
|
|
|
10708
11497
|
{
|
|
10709
11498
|
description: "Find the shortest graph path between two targets.",
|
|
10710
11499
|
inputSchema: {
|
|
10711
|
-
from:
|
|
10712
|
-
to:
|
|
11500
|
+
from: z8.string().min(1).describe("Start node/page label or id"),
|
|
11501
|
+
to: z8.string().min(1).describe("End node/page label or id")
|
|
10713
11502
|
}
|
|
10714
11503
|
},
|
|
10715
11504
|
async ({ from, to }) => {
|
|
@@ -10721,7 +11510,7 @@ async function createMcpServer(rootDir) {
|
|
|
10721
11510
|
{
|
|
10722
11511
|
description: "List the highest-connectivity graph nodes.",
|
|
10723
11512
|
inputSchema: {
|
|
10724
|
-
limit:
|
|
11513
|
+
limit: z8.number().int().min(1).max(25).optional().describe("Maximum nodes to return")
|
|
10725
11514
|
}
|
|
10726
11515
|
},
|
|
10727
11516
|
async ({ limit }) => {
|
|
@@ -10733,9 +11522,9 @@ async function createMcpServer(rootDir) {
|
|
|
10733
11522
|
{
|
|
10734
11523
|
description: "Ask a question against the compiled vault and optionally save the answer.",
|
|
10735
11524
|
inputSchema: {
|
|
10736
|
-
question:
|
|
10737
|
-
save:
|
|
10738
|
-
format:
|
|
11525
|
+
question: z8.string().min(1).describe("Question to ask the vault"),
|
|
11526
|
+
save: z8.boolean().optional().describe("Persist the answer to wiki/outputs"),
|
|
11527
|
+
format: z8.enum(["markdown", "report", "slides", "chart", "image"]).optional().describe("Output format")
|
|
10739
11528
|
}
|
|
10740
11529
|
},
|
|
10741
11530
|
async ({ question, save, format }) => {
|
|
@@ -10752,7 +11541,7 @@ async function createMcpServer(rootDir) {
|
|
|
10752
11541
|
{
|
|
10753
11542
|
description: "Ingest a local file path or URL into the SwarmVault workspace.",
|
|
10754
11543
|
inputSchema: {
|
|
10755
|
-
input:
|
|
11544
|
+
input: z8.string().min(1).describe("Local path or URL to ingest")
|
|
10756
11545
|
}
|
|
10757
11546
|
},
|
|
10758
11547
|
async ({ input }) => {
|
|
@@ -10765,7 +11554,7 @@ async function createMcpServer(rootDir) {
|
|
|
10765
11554
|
{
|
|
10766
11555
|
description: "Compile source manifests into wiki pages, graph data, and search index.",
|
|
10767
11556
|
inputSchema: {
|
|
10768
|
-
approve:
|
|
11557
|
+
approve: z8.boolean().optional().describe("Stage a review bundle without applying active page changes")
|
|
10769
11558
|
}
|
|
10770
11559
|
},
|
|
10771
11560
|
async ({ approve }) => {
|
|
@@ -10849,7 +11638,7 @@ async function createMcpServer(rootDir) {
|
|
|
10849
11638
|
},
|
|
10850
11639
|
async () => {
|
|
10851
11640
|
const { paths } = await loadVaultConfig(rootDir);
|
|
10852
|
-
const files = (await listFilesRecursive(paths.sessionsDir)).filter((filePath) => filePath.endsWith(".md")).map((filePath) => toPosix(
|
|
11641
|
+
const files = (await listFilesRecursive(paths.sessionsDir)).filter((filePath) => filePath.endsWith(".md")).map((filePath) => toPosix(path19.relative(paths.sessionsDir, filePath))).sort();
|
|
10853
11642
|
return asTextResource("swarmvault://sessions", JSON.stringify(files, null, 2));
|
|
10854
11643
|
}
|
|
10855
11644
|
);
|
|
@@ -10882,8 +11671,8 @@ async function createMcpServer(rootDir) {
|
|
|
10882
11671
|
return asTextResource(`swarmvault://pages/${encodedPath}`, `Page not found: ${relativePath}`);
|
|
10883
11672
|
}
|
|
10884
11673
|
const { paths } = await loadVaultConfig(rootDir);
|
|
10885
|
-
const absolutePath =
|
|
10886
|
-
return asTextResource(`swarmvault://pages/${encodedPath}`, await
|
|
11674
|
+
const absolutePath = path19.resolve(paths.wikiDir, relativePath);
|
|
11675
|
+
return asTextResource(`swarmvault://pages/${encodedPath}`, await fs16.readFile(absolutePath, "utf8"));
|
|
10887
11676
|
}
|
|
10888
11677
|
);
|
|
10889
11678
|
server.registerResource(
|
|
@@ -10891,11 +11680,11 @@ async function createMcpServer(rootDir) {
|
|
|
10891
11680
|
new ResourceTemplate("swarmvault://sessions/{path}", {
|
|
10892
11681
|
list: async () => {
|
|
10893
11682
|
const { paths } = await loadVaultConfig(rootDir);
|
|
10894
|
-
const files = (await listFilesRecursive(paths.sessionsDir)).filter((filePath) => filePath.endsWith(".md")).map((filePath) => toPosix(
|
|
11683
|
+
const files = (await listFilesRecursive(paths.sessionsDir)).filter((filePath) => filePath.endsWith(".md")).map((filePath) => toPosix(path19.relative(paths.sessionsDir, filePath))).sort();
|
|
10895
11684
|
return {
|
|
10896
11685
|
resources: files.map((relativePath) => ({
|
|
10897
11686
|
uri: `swarmvault://sessions/${encodeURIComponent(relativePath)}`,
|
|
10898
|
-
name:
|
|
11687
|
+
name: path19.basename(relativePath, ".md"),
|
|
10899
11688
|
title: relativePath,
|
|
10900
11689
|
description: "SwarmVault session artifact",
|
|
10901
11690
|
mimeType: "text/markdown"
|
|
@@ -10912,11 +11701,11 @@ async function createMcpServer(rootDir) {
|
|
|
10912
11701
|
const { paths } = await loadVaultConfig(rootDir);
|
|
10913
11702
|
const encodedPath = typeof variables.path === "string" ? variables.path : "";
|
|
10914
11703
|
const relativePath = decodeURIComponent(encodedPath);
|
|
10915
|
-
const absolutePath =
|
|
11704
|
+
const absolutePath = path19.resolve(paths.sessionsDir, relativePath);
|
|
10916
11705
|
if (!absolutePath.startsWith(paths.sessionsDir) || !await fileExists(absolutePath)) {
|
|
10917
11706
|
return asTextResource(`swarmvault://sessions/${encodedPath}`, `Session not found: ${relativePath}`);
|
|
10918
11707
|
}
|
|
10919
|
-
return asTextResource(`swarmvault://sessions/${encodedPath}`, await
|
|
11708
|
+
return asTextResource(`swarmvault://sessions/${encodedPath}`, await fs16.readFile(absolutePath, "utf8"));
|
|
10920
11709
|
}
|
|
10921
11710
|
);
|
|
10922
11711
|
return server;
|
|
@@ -10964,13 +11753,13 @@ function asTextResource(uri, text) {
|
|
|
10964
11753
|
}
|
|
10965
11754
|
|
|
10966
11755
|
// src/schedule.ts
|
|
10967
|
-
import
|
|
10968
|
-
import
|
|
11756
|
+
import fs17 from "fs/promises";
|
|
11757
|
+
import path20 from "path";
|
|
10969
11758
|
function scheduleStatePath(schedulesDir, jobId) {
|
|
10970
|
-
return
|
|
11759
|
+
return path20.join(schedulesDir, `${encodeURIComponent(jobId)}.json`);
|
|
10971
11760
|
}
|
|
10972
11761
|
function scheduleLockPath(schedulesDir, jobId) {
|
|
10973
|
-
return
|
|
11762
|
+
return path20.join(schedulesDir, `${encodeURIComponent(jobId)}.lock`);
|
|
10974
11763
|
}
|
|
10975
11764
|
function parseEveryDuration(value) {
|
|
10976
11765
|
const match = value.trim().match(/^(\d+)(m|h|d)$/i);
|
|
@@ -11073,13 +11862,13 @@ async function acquireJobLease(rootDir, jobId) {
|
|
|
11073
11862
|
const { paths } = await loadVaultConfig(rootDir);
|
|
11074
11863
|
const leasePath = scheduleLockPath(paths.schedulesDir, jobId);
|
|
11075
11864
|
await ensureDir(paths.schedulesDir);
|
|
11076
|
-
const handle = await
|
|
11865
|
+
const handle = await fs17.open(leasePath, "wx");
|
|
11077
11866
|
await handle.writeFile(`${process.pid}
|
|
11078
11867
|
${(/* @__PURE__ */ new Date()).toISOString()}
|
|
11079
11868
|
`);
|
|
11080
11869
|
await handle.close();
|
|
11081
11870
|
return async () => {
|
|
11082
|
-
await
|
|
11871
|
+
await fs17.rm(leasePath, { force: true });
|
|
11083
11872
|
};
|
|
11084
11873
|
}
|
|
11085
11874
|
async function listSchedules(rootDir) {
|
|
@@ -11227,15 +12016,15 @@ async function serveSchedules(rootDir, pollMs = 3e4) {
|
|
|
11227
12016
|
|
|
11228
12017
|
// src/viewer.ts
|
|
11229
12018
|
import { execFile } from "child_process";
|
|
11230
|
-
import
|
|
12019
|
+
import fs18 from "fs/promises";
|
|
11231
12020
|
import http from "http";
|
|
11232
|
-
import
|
|
12021
|
+
import path22 from "path";
|
|
11233
12022
|
import { promisify } from "util";
|
|
11234
|
-
import
|
|
12023
|
+
import matter10 from "gray-matter";
|
|
11235
12024
|
import mime2 from "mime-types";
|
|
11236
12025
|
|
|
11237
12026
|
// src/watch.ts
|
|
11238
|
-
import
|
|
12027
|
+
import path21 from "path";
|
|
11239
12028
|
import process2 from "process";
|
|
11240
12029
|
import chokidar from "chokidar";
|
|
11241
12030
|
var MAX_BACKOFF_MS = 3e4;
|
|
@@ -11243,15 +12032,15 @@ var BACKOFF_THRESHOLD = 3;
|
|
|
11243
12032
|
var CRITICAL_THRESHOLD = 10;
|
|
11244
12033
|
var REPO_WATCH_IGNORES = /* @__PURE__ */ new Set([".git", "node_modules", "dist", "build", ".next", "coverage", ".venv", "vendor", "target"]);
|
|
11245
12034
|
function withinRoot2(rootPath, targetPath) {
|
|
11246
|
-
const relative =
|
|
11247
|
-
return relative === "" || !relative.startsWith("..") && !
|
|
12035
|
+
const relative = path21.relative(rootPath, targetPath);
|
|
12036
|
+
return relative === "" || !relative.startsWith("..") && !path21.isAbsolute(relative);
|
|
11248
12037
|
}
|
|
11249
12038
|
function hasIgnoredRepoSegment(baseDir, targetPath) {
|
|
11250
|
-
const relativePath =
|
|
12039
|
+
const relativePath = path21.relative(baseDir, targetPath);
|
|
11251
12040
|
if (!relativePath || relativePath.startsWith("..")) {
|
|
11252
12041
|
return false;
|
|
11253
12042
|
}
|
|
11254
|
-
return relativePath.split(
|
|
12043
|
+
return relativePath.split(path21.sep).some((segment) => REPO_WATCH_IGNORES.has(segment));
|
|
11255
12044
|
}
|
|
11256
12045
|
function workspaceIgnoreRoots(rootDir, paths) {
|
|
11257
12046
|
return [
|
|
@@ -11260,16 +12049,16 @@ function workspaceIgnoreRoots(rootDir, paths) {
|
|
|
11260
12049
|
paths.stateDir,
|
|
11261
12050
|
paths.agentDir,
|
|
11262
12051
|
paths.inboxDir,
|
|
11263
|
-
|
|
11264
|
-
|
|
11265
|
-
|
|
11266
|
-
].map((candidate) =>
|
|
12052
|
+
path21.join(rootDir, ".claude"),
|
|
12053
|
+
path21.join(rootDir, ".cursor"),
|
|
12054
|
+
path21.join(rootDir, ".obsidian")
|
|
12055
|
+
].map((candidate) => path21.resolve(candidate));
|
|
11267
12056
|
}
|
|
11268
12057
|
async function resolveWatchTargets(rootDir, paths, options) {
|
|
11269
|
-
const targets = /* @__PURE__ */ new Set([
|
|
12058
|
+
const targets = /* @__PURE__ */ new Set([path21.resolve(paths.inboxDir)]);
|
|
11270
12059
|
if (options.repo) {
|
|
11271
12060
|
for (const repoRoot of await listTrackedRepoRoots(rootDir)) {
|
|
11272
|
-
targets.add(
|
|
12061
|
+
targets.add(path21.resolve(repoRoot));
|
|
11273
12062
|
}
|
|
11274
12063
|
}
|
|
11275
12064
|
return [...targets].sort((left, right) => left.localeCompare(right));
|
|
@@ -11399,7 +12188,7 @@ async function watchVault(rootDir, options = {}) {
|
|
|
11399
12188
|
const { paths } = await initWorkspace(rootDir);
|
|
11400
12189
|
const baseDebounceMs = options.debounceMs ?? 900;
|
|
11401
12190
|
const ignoredRoots = workspaceIgnoreRoots(rootDir, paths);
|
|
11402
|
-
const inboxWatchRoot =
|
|
12191
|
+
const inboxWatchRoot = path21.resolve(paths.inboxDir);
|
|
11403
12192
|
let watchTargets = await resolveWatchTargets(rootDir, paths, options);
|
|
11404
12193
|
let timer;
|
|
11405
12194
|
let running = false;
|
|
@@ -11413,7 +12202,7 @@ async function watchVault(rootDir, options = {}) {
|
|
|
11413
12202
|
usePolling: true,
|
|
11414
12203
|
interval: 100,
|
|
11415
12204
|
ignored: (targetPath) => {
|
|
11416
|
-
const absolutePath =
|
|
12205
|
+
const absolutePath = path21.resolve(targetPath);
|
|
11417
12206
|
const primaryTarget = watchTargets.filter((watchTarget) => withinRoot2(watchTarget, absolutePath)).sort((left, right) => right.length - left.length)[0] ?? null;
|
|
11418
12207
|
if (!primaryTarget) {
|
|
11419
12208
|
return false;
|
|
@@ -11585,8 +12374,8 @@ async function watchVault(rootDir, options = {}) {
|
|
|
11585
12374
|
}
|
|
11586
12375
|
};
|
|
11587
12376
|
const reasonForPath = (targetPath) => {
|
|
11588
|
-
const baseDir = watchTargets.filter((watchTarget) => withinRoot2(watchTarget,
|
|
11589
|
-
return
|
|
12377
|
+
const baseDir = watchTargets.filter((watchTarget) => withinRoot2(watchTarget, path21.resolve(targetPath))).sort((left, right) => right.length - left.length)[0] ?? paths.inboxDir;
|
|
12378
|
+
return path21.relative(baseDir, targetPath) || ".";
|
|
11590
12379
|
};
|
|
11591
12380
|
watcher.on("add", (filePath) => schedule(`add:${reasonForPath(filePath)}`)).on("change", (filePath) => schedule(`change:${reasonForPath(filePath)}`)).on("unlink", (filePath) => schedule(`unlink:${reasonForPath(filePath)}`)).on("addDir", (dirPath) => schedule(`addDir:${reasonForPath(dirPath)}`)).on("unlinkDir", (dirPath) => schedule(`unlinkDir:${reasonForPath(dirPath)}`)).on("error", (caught) => schedule(`error:${caught instanceof Error ? caught.message : String(caught)}`));
|
|
11592
12381
|
await new Promise((resolve, reject) => {
|
|
@@ -11627,15 +12416,15 @@ async function getWatchStatus(rootDir) {
|
|
|
11627
12416
|
var execFileAsync = promisify(execFile);
|
|
11628
12417
|
async function readViewerPage(rootDir, relativePath) {
|
|
11629
12418
|
const { paths } = await loadVaultConfig(rootDir);
|
|
11630
|
-
const absolutePath =
|
|
12419
|
+
const absolutePath = path22.resolve(paths.wikiDir, relativePath);
|
|
11631
12420
|
if (!absolutePath.startsWith(paths.wikiDir) || !await fileExists(absolutePath)) {
|
|
11632
12421
|
return null;
|
|
11633
12422
|
}
|
|
11634
|
-
const raw = await
|
|
11635
|
-
const parsed =
|
|
12423
|
+
const raw = await fs18.readFile(absolutePath, "utf8");
|
|
12424
|
+
const parsed = matter10(raw);
|
|
11636
12425
|
return {
|
|
11637
12426
|
path: relativePath,
|
|
11638
|
-
title: typeof parsed.data.title === "string" ? parsed.data.title :
|
|
12427
|
+
title: typeof parsed.data.title === "string" ? parsed.data.title : path22.basename(relativePath, path22.extname(relativePath)),
|
|
11639
12428
|
frontmatter: parsed.data,
|
|
11640
12429
|
content: parsed.content,
|
|
11641
12430
|
assets: normalizeOutputAssets(parsed.data.output_assets)
|
|
@@ -11643,12 +12432,12 @@ async function readViewerPage(rootDir, relativePath) {
|
|
|
11643
12432
|
}
|
|
11644
12433
|
async function readViewerAsset(rootDir, relativePath) {
|
|
11645
12434
|
const { paths } = await loadVaultConfig(rootDir);
|
|
11646
|
-
const absolutePath =
|
|
12435
|
+
const absolutePath = path22.resolve(paths.wikiDir, relativePath);
|
|
11647
12436
|
if (!absolutePath.startsWith(paths.wikiDir) || !await fileExists(absolutePath)) {
|
|
11648
12437
|
return null;
|
|
11649
12438
|
}
|
|
11650
12439
|
return {
|
|
11651
|
-
buffer: await
|
|
12440
|
+
buffer: await fs18.readFile(absolutePath),
|
|
11652
12441
|
mimeType: mime2.lookup(absolutePath) || "application/octet-stream"
|
|
11653
12442
|
};
|
|
11654
12443
|
}
|
|
@@ -11671,12 +12460,12 @@ async function readJsonBody(request) {
|
|
|
11671
12460
|
return JSON.parse(raw);
|
|
11672
12461
|
}
|
|
11673
12462
|
async function ensureViewerDist(viewerDistDir) {
|
|
11674
|
-
const indexPath =
|
|
12463
|
+
const indexPath = path22.join(viewerDistDir, "index.html");
|
|
11675
12464
|
if (await fileExists(indexPath)) {
|
|
11676
12465
|
return;
|
|
11677
12466
|
}
|
|
11678
|
-
const viewerProjectDir =
|
|
11679
|
-
if (await fileExists(
|
|
12467
|
+
const viewerProjectDir = path22.dirname(viewerDistDir);
|
|
12468
|
+
if (await fileExists(path22.join(viewerProjectDir, "package.json"))) {
|
|
11680
12469
|
await execFileAsync("pnpm", ["build"], { cwd: viewerProjectDir });
|
|
11681
12470
|
}
|
|
11682
12471
|
}
|
|
@@ -11693,7 +12482,7 @@ async function startGraphServer(rootDir, port) {
|
|
|
11693
12482
|
return;
|
|
11694
12483
|
}
|
|
11695
12484
|
response.writeHead(200, { "content-type": "application/json" });
|
|
11696
|
-
response.end(await
|
|
12485
|
+
response.end(await fs18.readFile(paths.graphPath, "utf8"));
|
|
11697
12486
|
return;
|
|
11698
12487
|
}
|
|
11699
12488
|
if (url.pathname === "/api/graph/query") {
|
|
@@ -11735,16 +12524,29 @@ async function startGraphServer(rootDir, port) {
|
|
|
11735
12524
|
const kind = url.searchParams.get("kind") ?? "all";
|
|
11736
12525
|
const status = url.searchParams.get("status") ?? "all";
|
|
11737
12526
|
const project = url.searchParams.get("project") ?? "all";
|
|
12527
|
+
const sourceType = url.searchParams.get("sourceType") ?? "all";
|
|
11738
12528
|
const results = searchPages(paths.searchDbPath, query, {
|
|
11739
12529
|
limit: Number.isFinite(limit) ? limit : 10,
|
|
11740
12530
|
kind,
|
|
11741
12531
|
status,
|
|
11742
|
-
project
|
|
12532
|
+
project,
|
|
12533
|
+
sourceType
|
|
11743
12534
|
});
|
|
11744
12535
|
response.writeHead(200, { "content-type": "application/json" });
|
|
11745
12536
|
response.end(JSON.stringify(results));
|
|
11746
12537
|
return;
|
|
11747
12538
|
}
|
|
12539
|
+
if (url.pathname === "/api/graph-report") {
|
|
12540
|
+
const reportPath = path22.join(paths.wikiDir, "graph", "report.json");
|
|
12541
|
+
if (!await fileExists(reportPath)) {
|
|
12542
|
+
response.writeHead(404, { "content-type": "application/json" });
|
|
12543
|
+
response.end(JSON.stringify({ error: "Graph report artifact not found. Run `swarmvault compile` first." }));
|
|
12544
|
+
return;
|
|
12545
|
+
}
|
|
12546
|
+
response.writeHead(200, { "content-type": "application/json" });
|
|
12547
|
+
response.end(await fs18.readFile(reportPath, "utf8"));
|
|
12548
|
+
return;
|
|
12549
|
+
}
|
|
11748
12550
|
if (url.pathname === "/api/watch-status") {
|
|
11749
12551
|
response.writeHead(200, { "content-type": "application/json" });
|
|
11750
12552
|
response.end(JSON.stringify(await getWatchStatus(rootDir)));
|
|
@@ -11825,8 +12627,8 @@ async function startGraphServer(rootDir, port) {
|
|
|
11825
12627
|
return;
|
|
11826
12628
|
}
|
|
11827
12629
|
const relativePath = url.pathname === "/" ? "index.html" : url.pathname.slice(1);
|
|
11828
|
-
const target =
|
|
11829
|
-
const fallback =
|
|
12630
|
+
const target = path22.join(paths.viewerDistDir, relativePath);
|
|
12631
|
+
const fallback = path22.join(paths.viewerDistDir, "index.html");
|
|
11830
12632
|
const filePath = await fileExists(target) ? target : fallback;
|
|
11831
12633
|
if (!await fileExists(filePath)) {
|
|
11832
12634
|
response.writeHead(503, { "content-type": "text/plain" });
|
|
@@ -11834,7 +12636,7 @@ async function startGraphServer(rootDir, port) {
|
|
|
11834
12636
|
return;
|
|
11835
12637
|
}
|
|
11836
12638
|
response.writeHead(200, { "content-type": mime2.lookup(filePath) || "text/plain" });
|
|
11837
|
-
response.end(await
|
|
12639
|
+
response.end(await fs18.readFile(filePath));
|
|
11838
12640
|
});
|
|
11839
12641
|
await new Promise((resolve) => {
|
|
11840
12642
|
server.listen(effectivePort, resolve);
|
|
@@ -11861,7 +12663,7 @@ async function exportGraphHtml(rootDir, outputPath) {
|
|
|
11861
12663
|
throw new Error("Graph artifact not found. Run `swarmvault compile` first.");
|
|
11862
12664
|
}
|
|
11863
12665
|
await ensureViewerDist(paths.viewerDistDir);
|
|
11864
|
-
const indexPath =
|
|
12666
|
+
const indexPath = path22.join(paths.viewerDistDir, "index.html");
|
|
11865
12667
|
if (!await fileExists(indexPath)) {
|
|
11866
12668
|
throw new Error("Viewer build not found. Run `pnpm build` first.");
|
|
11867
12669
|
}
|
|
@@ -11874,6 +12676,7 @@ async function exportGraphHtml(rootDir, outputPath) {
|
|
|
11874
12676
|
title: loaded.title,
|
|
11875
12677
|
kind: page.kind,
|
|
11876
12678
|
status: page.status,
|
|
12679
|
+
sourceType: page.sourceType,
|
|
11877
12680
|
projectIds: page.projectIds,
|
|
11878
12681
|
content: loaded.content,
|
|
11879
12682
|
assets: await Promise.all(
|
|
@@ -11885,17 +12688,18 @@ async function exportGraphHtml(rootDir, outputPath) {
|
|
|
11885
12688
|
} : null;
|
|
11886
12689
|
})
|
|
11887
12690
|
);
|
|
11888
|
-
const rawHtml = await
|
|
12691
|
+
const rawHtml = await fs18.readFile(indexPath, "utf8");
|
|
11889
12692
|
const scriptMatch = rawHtml.match(/<script type="module" crossorigin src="([^"]+)"><\/script>/);
|
|
11890
12693
|
const styleMatch = rawHtml.match(/<link rel="stylesheet" crossorigin href="([^"]+)">/);
|
|
11891
|
-
const scriptPath = scriptMatch?.[1] ?
|
|
11892
|
-
const stylePath = styleMatch?.[1] ?
|
|
12694
|
+
const scriptPath = scriptMatch?.[1] ? path22.join(paths.viewerDistDir, scriptMatch[1].replace(/^\//, "")) : null;
|
|
12695
|
+
const stylePath = styleMatch?.[1] ? path22.join(paths.viewerDistDir, styleMatch[1].replace(/^\//, "")) : null;
|
|
11893
12696
|
if (!scriptPath || !await fileExists(scriptPath)) {
|
|
11894
12697
|
throw new Error("Viewer script bundle not found. Run `pnpm build` first.");
|
|
11895
12698
|
}
|
|
11896
|
-
const script = await
|
|
11897
|
-
const style = stylePath && await fileExists(stylePath) ? await
|
|
11898
|
-
const
|
|
12699
|
+
const script = await fs18.readFile(scriptPath, "utf8");
|
|
12700
|
+
const style = stylePath && await fileExists(stylePath) ? await fs18.readFile(stylePath, "utf8") : "";
|
|
12701
|
+
const report = await readJsonFile(path22.join(paths.wikiDir, "graph", "report.json"));
|
|
12702
|
+
const embeddedData = JSON.stringify({ graph, pages: pages.filter(Boolean), report }, null, 2).replace(/</g, "\\u003c");
|
|
11899
12703
|
const html = [
|
|
11900
12704
|
"<!doctype html>",
|
|
11901
12705
|
'<html lang="en">',
|
|
@@ -11913,9 +12717,9 @@ async function exportGraphHtml(rootDir, outputPath) {
|
|
|
11913
12717
|
"</html>",
|
|
11914
12718
|
""
|
|
11915
12719
|
].filter(Boolean).join("\n");
|
|
11916
|
-
await
|
|
11917
|
-
await
|
|
11918
|
-
return
|
|
12720
|
+
await fs18.mkdir(path22.dirname(outputPath), { recursive: true });
|
|
12721
|
+
await fs18.writeFile(outputPath, html, "utf8");
|
|
12722
|
+
return path22.resolve(outputPath);
|
|
11919
12723
|
}
|
|
11920
12724
|
export {
|
|
11921
12725
|
acceptApproval,
|