@swarmvaultai/engine 0.1.22 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -2
- package/dist/chunk-6UPHDGEB.js +1073 -0
- package/dist/index.d.ts +89 -2
- package/dist/index.js +584 -147
- package/dist/registry-6KZMA3XM.js +12 -0
- package/dist/viewer/assets/index-f8JPYMw_.js +330 -0
- package/dist/viewer/index.html +1 -1
- package/dist/viewer/lib.d.ts +52 -1
- package/dist/viewer/lib.js +23 -4
- package/package.json +1 -1
- package/dist/viewer/assets/index-DEETVhXx.js +0 -330
package/dist/index.js
CHANGED
|
@@ -21,7 +21,7 @@ import {
|
|
|
21
21
|
uniqueBy,
|
|
22
22
|
writeFileIfChanged,
|
|
23
23
|
writeJsonFile
|
|
24
|
-
} from "./chunk-
|
|
24
|
+
} from "./chunk-6UPHDGEB.js";
|
|
25
25
|
|
|
26
26
|
// src/agents.ts
|
|
27
27
|
import fs from "fs/promises";
|
|
@@ -596,6 +596,7 @@ async function uninstallGitHooks(rootDir) {
|
|
|
596
596
|
import fs9 from "fs/promises";
|
|
597
597
|
import path9 from "path";
|
|
598
598
|
import { Readability } from "@mozilla/readability";
|
|
599
|
+
import matter3 from "gray-matter";
|
|
599
600
|
import ignore from "ignore";
|
|
600
601
|
import { JSDOM } from "jsdom";
|
|
601
602
|
import mime from "mime-types";
|
|
@@ -3204,6 +3205,9 @@ async function markPagesStaleForSources(rootDir, sourceIds) {
|
|
|
3204
3205
|
var DEFAULT_MAX_ASSET_SIZE = 10 * 1024 * 1024;
|
|
3205
3206
|
var DEFAULT_MAX_DIRECTORY_FILES = 5e3;
|
|
3206
3207
|
var BUILT_IN_REPO_IGNORES = /* @__PURE__ */ new Set([".git", "node_modules", "dist", "build", ".next", "coverage", ".venv", "vendor", "target"]);
|
|
3208
|
+
function uniqueStrings(values) {
|
|
3209
|
+
return [...new Set(values.filter(Boolean))];
|
|
3210
|
+
}
|
|
3207
3211
|
function inferKind(mimeType, filePath) {
|
|
3208
3212
|
if (inferCodeLanguage(filePath, mimeType)) {
|
|
3209
3213
|
return "code";
|
|
@@ -3321,6 +3325,22 @@ function arxivIdFromInput(input) {
|
|
|
3321
3325
|
return null;
|
|
3322
3326
|
}
|
|
3323
3327
|
}
|
|
3328
|
+
function doiFromInput(input) {
|
|
3329
|
+
const trimmed = input.trim();
|
|
3330
|
+
if (/^10\.\S+\/\S+$/i.test(trimmed)) {
|
|
3331
|
+
return trimmed.replace(/\s+/g, "");
|
|
3332
|
+
}
|
|
3333
|
+
try {
|
|
3334
|
+
const url = new URL(trimmed);
|
|
3335
|
+
if (url.hostname === "doi.org" || url.hostname === "dx.doi.org") {
|
|
3336
|
+
const doi = decodeURIComponent(url.pathname.replace(/^\/+/, ""));
|
|
3337
|
+
return /^10\.\S+\/\S+$/i.test(doi) ? doi : null;
|
|
3338
|
+
}
|
|
3339
|
+
} catch {
|
|
3340
|
+
return null;
|
|
3341
|
+
}
|
|
3342
|
+
return null;
|
|
3343
|
+
}
|
|
3324
3344
|
function isTweetUrl(input) {
|
|
3325
3345
|
try {
|
|
3326
3346
|
const url = new URL(input);
|
|
@@ -3330,26 +3350,25 @@ function isTweetUrl(input) {
|
|
|
3330
3350
|
}
|
|
3331
3351
|
}
|
|
3332
3352
|
function markdownFrontmatter(value) {
|
|
3333
|
-
const
|
|
3334
|
-
|
|
3335
|
-
|
|
3336
|
-
|
|
3337
|
-
|
|
3338
|
-
|
|
3339
|
-
}
|
|
3340
|
-
lines.push("---", "");
|
|
3341
|
-
return lines;
|
|
3353
|
+
const normalized = Object.fromEntries(
|
|
3354
|
+
Object.entries(value).filter(
|
|
3355
|
+
([, rawValue]) => Array.isArray(rawValue) ? rawValue.length > 0 : Boolean(typeof rawValue === "string" ? rawValue.trim() : rawValue)
|
|
3356
|
+
)
|
|
3357
|
+
);
|
|
3358
|
+
return matter3.stringify("", normalized).trimEnd().split("\n").concat([""]);
|
|
3342
3359
|
}
|
|
3343
3360
|
function prepareCapturedMarkdownInput(input) {
|
|
3344
3361
|
return {
|
|
3345
3362
|
title: input.title,
|
|
3346
3363
|
originType: "url",
|
|
3347
3364
|
sourceKind: "markdown",
|
|
3365
|
+
sourceType: input.sourceType,
|
|
3348
3366
|
url: normalizeOriginUrl(input.url),
|
|
3349
3367
|
mimeType: "text/markdown",
|
|
3350
3368
|
storedExtension: ".md",
|
|
3351
3369
|
payloadBytes: Buffer.from(input.markdown, "utf8"),
|
|
3352
3370
|
extractedText: input.markdown,
|
|
3371
|
+
attachments: input.attachments,
|
|
3353
3372
|
logDetails: input.logDetails
|
|
3354
3373
|
};
|
|
3355
3374
|
}
|
|
@@ -3360,6 +3379,17 @@ async function fetchText(url) {
|
|
|
3360
3379
|
}
|
|
3361
3380
|
return response.text();
|
|
3362
3381
|
}
|
|
3382
|
+
async function fetchResolvedText(url) {
|
|
3383
|
+
const response = await fetch(url);
|
|
3384
|
+
if (!response.ok) {
|
|
3385
|
+
throw new Error(`Failed to fetch ${url}: ${response.status} ${response.statusText}`);
|
|
3386
|
+
}
|
|
3387
|
+
return {
|
|
3388
|
+
text: await response.text(),
|
|
3389
|
+
finalUrl: normalizeOriginUrl(response.url || url),
|
|
3390
|
+
contentType: response.headers.get("content-type")?.split(";")[0]?.trim() || "text/html"
|
|
3391
|
+
};
|
|
3392
|
+
}
|
|
3363
3393
|
function domTextFromHtml(html, baseUrl) {
|
|
3364
3394
|
const dom = new JSDOM(`<body>${html}</body>`, { url: baseUrl });
|
|
3365
3395
|
return normalizeWhitespace(dom.window.document.body.textContent ?? "");
|
|
@@ -3379,11 +3409,16 @@ async function captureArxivMarkdown(input, options) {
|
|
|
3379
3409
|
const authors = [...document.querySelectorAll('meta[name="citation_author"]')].map((node) => node.getAttribute("content")?.trim()).filter((value) => Boolean(value));
|
|
3380
3410
|
const authorsText = authors.join(", ") || stripLeadingLabel(document.querySelector(".authors")?.textContent?.trim() ?? "", "Authors:");
|
|
3381
3411
|
const abstract = stripLeadingLabel(document.querySelector("blockquote.abstract")?.textContent?.trim() ?? "", "Abstract:");
|
|
3412
|
+
const categories = [...document.querySelectorAll(".subheader .primary-subject, .metatable .tablecell.subjects")].flatMap((node) => (node.textContent ?? "").split(/;/g)).map((value) => value.trim()).filter(Boolean);
|
|
3382
3413
|
const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
3383
3414
|
const markdown = [
|
|
3384
3415
|
...markdownFrontmatter({
|
|
3385
|
-
|
|
3416
|
+
source_type: "arxiv",
|
|
3386
3417
|
source_url: normalizedUrl,
|
|
3418
|
+
canonical_url: normalizedUrl,
|
|
3419
|
+
title,
|
|
3420
|
+
authors,
|
|
3421
|
+
tags: uniqueStrings(categories),
|
|
3387
3422
|
arxiv_id: arxivId,
|
|
3388
3423
|
author: options.author,
|
|
3389
3424
|
contributor: options.contributor,
|
|
@@ -3423,8 +3458,11 @@ async function captureTweetMarkdown(input, options) {
|
|
|
3423
3458
|
const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
3424
3459
|
const markdown = [
|
|
3425
3460
|
...markdownFrontmatter({
|
|
3426
|
-
|
|
3461
|
+
source_type: "tweet",
|
|
3427
3462
|
source_url: normalizedUrl,
|
|
3463
|
+
canonical_url: canonicalUrl,
|
|
3464
|
+
title,
|
|
3465
|
+
authors: postAuthor ? [postAuthor] : void 0,
|
|
3428
3466
|
author: options.author,
|
|
3429
3467
|
contributor: options.contributor,
|
|
3430
3468
|
captured_at: capturedAt
|
|
@@ -3446,6 +3484,101 @@ async function captureTweetMarkdown(input, options) {
|
|
|
3446
3484
|
].join("\n");
|
|
3447
3485
|
return { title, normalizedUrl, markdown };
|
|
3448
3486
|
}
|
|
3487
|
+
function firstMetaContent(document, selectors) {
|
|
3488
|
+
for (const selector of selectors) {
|
|
3489
|
+
const value = document.querySelector(selector)?.getAttribute("content")?.trim();
|
|
3490
|
+
if (value) {
|
|
3491
|
+
return value;
|
|
3492
|
+
}
|
|
3493
|
+
}
|
|
3494
|
+
return void 0;
|
|
3495
|
+
}
|
|
3496
|
+
function metaContents(document, selectors) {
|
|
3497
|
+
return uniqueStrings(
|
|
3498
|
+
selectors.flatMap(
|
|
3499
|
+
(selector) => [...document.querySelectorAll(selector)].map((node) => node.getAttribute("content")?.trim() ?? "").filter(Boolean)
|
|
3500
|
+
)
|
|
3501
|
+
);
|
|
3502
|
+
}
|
|
3503
|
+
function splitKeywords(value) {
|
|
3504
|
+
return uniqueStrings(
|
|
3505
|
+
(value ?? "").split(/[;,]/g).map((item) => item.trim()).filter(Boolean)
|
|
3506
|
+
);
|
|
3507
|
+
}
|
|
3508
|
+
async function captureArticleMarkdown(rootDir, input, options, extra = { sourceType: "article" }) {
|
|
3509
|
+
const resolved = await fetchResolvedText(input);
|
|
3510
|
+
if (!resolved.contentType.includes("html")) {
|
|
3511
|
+
throw new Error(`Unsupported article content type: ${resolved.contentType}`);
|
|
3512
|
+
}
|
|
3513
|
+
const dom = new JSDOM(resolved.text, { url: resolved.finalUrl });
|
|
3514
|
+
const document = dom.window.document;
|
|
3515
|
+
const canonicalHref = document.querySelector('link[rel="canonical"]')?.getAttribute("href")?.trim();
|
|
3516
|
+
const canonicalUrl = canonicalHref ? normalizeOriginUrl(new URL(canonicalHref, resolved.finalUrl).toString()) : resolved.finalUrl;
|
|
3517
|
+
const title = firstMetaContent(document, ['meta[name="citation_title"]', 'meta[property="og:title"]', 'meta[name="twitter:title"]']) ?? (document.title.trim() || canonicalUrl);
|
|
3518
|
+
const authors = uniqueStrings([
|
|
3519
|
+
...metaContents(document, ['meta[name="citation_author"]']),
|
|
3520
|
+
...metaContents(document, ['meta[name="author"]', 'meta[property="article:author"]'])
|
|
3521
|
+
]);
|
|
3522
|
+
const publishedAt = firstMetaContent(document, [
|
|
3523
|
+
'meta[name="citation_publication_date"]',
|
|
3524
|
+
'meta[name="citation_online_date"]',
|
|
3525
|
+
'meta[property="article:published_time"]',
|
|
3526
|
+
'meta[name="pubdate"]'
|
|
3527
|
+
]);
|
|
3528
|
+
const updatedAt = firstMetaContent(document, ['meta[property="article:modified_time"]', 'meta[name="lastmod"]']);
|
|
3529
|
+
const tags = uniqueStrings([
|
|
3530
|
+
...metaContents(document, ['meta[property="article:tag"]']),
|
|
3531
|
+
...splitKeywords(firstMetaContent(document, ['meta[name="keywords"]']))
|
|
3532
|
+
]);
|
|
3533
|
+
const inferredDoi = extra.doi ?? firstMetaContent(document, ['meta[name="citation_doi"]', 'meta[name="dc.identifier"]'])?.replace(/^doi:\s*/i, "") ?? void 0;
|
|
3534
|
+
const normalizedOptions = normalizeIngestOptions(options);
|
|
3535
|
+
const prepared = await prepareUrlInput(rootDir, canonicalUrl, normalizedOptions);
|
|
3536
|
+
if (prepared.sourceKind !== "markdown" && prepared.sourceKind !== "text") {
|
|
3537
|
+
throw new Error(`Unsupported prepared article kind: ${prepared.sourceKind}`);
|
|
3538
|
+
}
|
|
3539
|
+
const body = prepared.extractedText ?? prepared.payloadBytes.toString("utf8");
|
|
3540
|
+
const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
3541
|
+
const markdown = [
|
|
3542
|
+
...markdownFrontmatter({
|
|
3543
|
+
source_type: extra.sourceType,
|
|
3544
|
+
source_url: extra.sourceUrl ?? input,
|
|
3545
|
+
canonical_url: canonicalUrl,
|
|
3546
|
+
title,
|
|
3547
|
+
authors,
|
|
3548
|
+
published_at: publishedAt,
|
|
3549
|
+
updated_at: updatedAt,
|
|
3550
|
+
doi: inferredDoi,
|
|
3551
|
+
tags,
|
|
3552
|
+
author: options.author,
|
|
3553
|
+
contributor: options.contributor,
|
|
3554
|
+
captured_at: capturedAt
|
|
3555
|
+
}),
|
|
3556
|
+
body.trim(),
|
|
3557
|
+
"",
|
|
3558
|
+
"## Source",
|
|
3559
|
+
"",
|
|
3560
|
+
`- URL: ${canonicalUrl}`,
|
|
3561
|
+
...extra.sourceType === "doi" && inferredDoi ? [`- DOI: ${inferredDoi}`] : [],
|
|
3562
|
+
""
|
|
3563
|
+
].join("\n");
|
|
3564
|
+
return {
|
|
3565
|
+
title,
|
|
3566
|
+
normalizedUrl: canonicalUrl,
|
|
3567
|
+
markdown,
|
|
3568
|
+
attachments: prepared.attachments
|
|
3569
|
+
};
|
|
3570
|
+
}
|
|
3571
|
+
async function captureDoiMarkdown(rootDir, input, options) {
|
|
3572
|
+
const doi = doiFromInput(input);
|
|
3573
|
+
if (!doi) {
|
|
3574
|
+
throw new Error(`Could not determine a DOI from ${input}`);
|
|
3575
|
+
}
|
|
3576
|
+
return captureArticleMarkdown(rootDir, `https://doi.org/${encodeURIComponent(doi)}`, options, {
|
|
3577
|
+
sourceType: "doi",
|
|
3578
|
+
sourceUrl: input,
|
|
3579
|
+
doi
|
|
3580
|
+
});
|
|
3581
|
+
}
|
|
3449
3582
|
function manifestMatchesOrigin(manifest, prepared) {
|
|
3450
3583
|
if (prepared.originType === "url") {
|
|
3451
3584
|
return Boolean(prepared.url && manifest.url && normalizeOriginUrl(manifest.url) === normalizeOriginUrl(prepared.url));
|
|
@@ -3789,7 +3922,7 @@ async function persistPreparedInput(rootDir, prepared, paths) {
|
|
|
3789
3922
|
const extractionHash = prepared.extractionHash ?? buildExtractionHash(prepared.extractedText, prepared.extractionArtifact);
|
|
3790
3923
|
const existingByOrigin = await readManifestByOrigin(paths.manifestsDir, prepared);
|
|
3791
3924
|
const existingByHash = existingByOrigin ? null : await readManifestByHash(paths.manifestsDir, contentHash);
|
|
3792
|
-
if (existingByOrigin && existingByOrigin.contentHash === contentHash && existingByOrigin.extractionHash === extractionHash && existingByOrigin.title === prepared.title && existingByOrigin.sourceKind === prepared.sourceKind && existingByOrigin.language === prepared.language && existingByOrigin.mimeType === prepared.mimeType && existingByOrigin.repoRelativePath === prepared.repoRelativePath) {
|
|
3925
|
+
if (existingByOrigin && existingByOrigin.contentHash === contentHash && existingByOrigin.extractionHash === extractionHash && existingByOrigin.title === prepared.title && existingByOrigin.sourceKind === prepared.sourceKind && existingByOrigin.sourceType === prepared.sourceType && existingByOrigin.language === prepared.language && existingByOrigin.mimeType === prepared.mimeType && existingByOrigin.repoRelativePath === prepared.repoRelativePath) {
|
|
3793
3926
|
return { manifest: existingByOrigin, isNew: false, wasUpdated: false };
|
|
3794
3927
|
}
|
|
3795
3928
|
if (existingByHash) {
|
|
@@ -3835,6 +3968,7 @@ async function persistPreparedInput(rootDir, prepared, paths) {
|
|
|
3835
3968
|
title: prepared.title,
|
|
3836
3969
|
originType: prepared.originType,
|
|
3837
3970
|
sourceKind: prepared.sourceKind,
|
|
3971
|
+
sourceType: prepared.sourceType,
|
|
3838
3972
|
language: prepared.language,
|
|
3839
3973
|
originalPath: prepared.originalPath,
|
|
3840
3974
|
repoRelativePath: prepared.repoRelativePath,
|
|
@@ -3892,7 +4026,7 @@ function repoSyncWorkspaceIgnorePaths(rootDir, paths, repoRoot) {
|
|
|
3892
4026
|
return candidates.map((candidate) => path9.resolve(candidate)).filter((candidate, index, items) => items.indexOf(candidate) === index).filter((candidate) => withinRoot(repoRoot, candidate));
|
|
3893
4027
|
}
|
|
3894
4028
|
function preparedMatchesManifest(manifest, prepared, contentHash) {
|
|
3895
|
-
return manifest.contentHash === contentHash && manifest.extractionHash === (prepared.extractionHash ?? buildExtractionHash(prepared.extractedText, prepared.extractionArtifact)) && manifest.title === prepared.title && manifest.sourceKind === prepared.sourceKind && manifest.language === prepared.language && manifest.mimeType === prepared.mimeType && manifest.repoRelativePath === prepared.repoRelativePath;
|
|
4029
|
+
return manifest.contentHash === contentHash && manifest.extractionHash === (prepared.extractionHash ?? buildExtractionHash(prepared.extractedText, prepared.extractionArtifact)) && manifest.title === prepared.title && manifest.sourceKind === prepared.sourceKind && manifest.sourceType === prepared.sourceType && manifest.language === prepared.language && manifest.mimeType === prepared.mimeType && manifest.repoRelativePath === prepared.repoRelativePath;
|
|
3896
4030
|
}
|
|
3897
4031
|
function shouldDeferWatchSemanticRefresh(sourceKind) {
|
|
3898
4032
|
return sourceKind === "markdown" || sourceKind === "text" || sourceKind === "html" || sourceKind === "pdf" || sourceKind === "image";
|
|
@@ -4184,7 +4318,8 @@ async function prepareUrlInput(rootDir, input, options) {
|
|
|
4184
4318
|
if (!response.ok) {
|
|
4185
4319
|
throw new Error(`Failed to fetch ${input}: ${response.status} ${response.statusText}`);
|
|
4186
4320
|
}
|
|
4187
|
-
const
|
|
4321
|
+
const finalUrl = normalizeOriginUrl(response.url || input);
|
|
4322
|
+
const inputUrl = new URL(finalUrl);
|
|
4188
4323
|
const originalPayloadBytes = Buffer.from(await response.arrayBuffer());
|
|
4189
4324
|
let payloadBytes = originalPayloadBytes;
|
|
4190
4325
|
let mimeType = resolveUrlMimeType(input, response);
|
|
@@ -4199,13 +4334,13 @@ async function prepareUrlInput(rootDir, input, options) {
|
|
|
4199
4334
|
const logDetails = [];
|
|
4200
4335
|
if (sourceKind === "html" || mimeType.startsWith("text/html")) {
|
|
4201
4336
|
const html = originalPayloadBytes.toString("utf8");
|
|
4202
|
-
const initialConversion = await convertHtmlToMarkdown(html,
|
|
4337
|
+
const initialConversion = await convertHtmlToMarkdown(html, finalUrl);
|
|
4203
4338
|
title = initialConversion.title;
|
|
4204
4339
|
let localizedHtml = html;
|
|
4205
4340
|
let localAssetReplacements;
|
|
4206
4341
|
if (options.includeAssets) {
|
|
4207
4342
|
const { attachments: remoteAttachments, skippedCount } = await collectRemoteImageAttachments(
|
|
4208
|
-
extractHtmlImageReferences(html,
|
|
4343
|
+
extractHtmlImageReferences(html, finalUrl),
|
|
4209
4344
|
options
|
|
4210
4345
|
);
|
|
4211
4346
|
if (remoteAttachments.length) {
|
|
@@ -4215,19 +4350,19 @@ async function prepareUrlInput(rootDir, input, options) {
|
|
|
4215
4350
|
localAssetReplacements = new Map(
|
|
4216
4351
|
remoteAttachments.map((attachment) => [attachment.originalPath ?? "", `../assets/${sourceId}/${attachment.relativePath}`])
|
|
4217
4352
|
);
|
|
4218
|
-
localizedHtml = rewriteHtmlImageReferences(html,
|
|
4353
|
+
localizedHtml = rewriteHtmlImageReferences(html, finalUrl, localAssetReplacements);
|
|
4219
4354
|
logDetails.push(`remote_assets=${remoteAttachments.length}`);
|
|
4220
4355
|
}
|
|
4221
4356
|
if (skippedCount) {
|
|
4222
4357
|
logDetails.push(`remote_asset_skips=${skippedCount}`);
|
|
4223
4358
|
}
|
|
4224
4359
|
}
|
|
4225
|
-
const converted = localizedHtml === html && !attachments?.length ? initialConversion : await convertHtmlToMarkdown(localizedHtml,
|
|
4360
|
+
const converted = localizedHtml === html && !attachments?.length ? initialConversion : await convertHtmlToMarkdown(localizedHtml, finalUrl);
|
|
4226
4361
|
extractedText = converted.markdown;
|
|
4227
4362
|
extractionArtifact = createHtmlReadabilityExtractionArtifact("markdown", "text/markdown");
|
|
4228
4363
|
if (localAssetReplacements?.size) {
|
|
4229
4364
|
const absoluteLocalAssetReplacements = new Map(
|
|
4230
|
-
[...localAssetReplacements.values()].map((replacement) => [new URL(replacement,
|
|
4365
|
+
[...localAssetReplacements.values()].map((replacement) => [new URL(replacement, finalUrl).toString(), replacement])
|
|
4231
4366
|
);
|
|
4232
4367
|
extractedText = rewriteMarkdownImageTargets(extractedText, absoluteLocalAssetReplacements);
|
|
4233
4368
|
}
|
|
@@ -4244,7 +4379,7 @@ async function prepareUrlInput(rootDir, input, options) {
|
|
|
4244
4379
|
extractionArtifact = createPlainTextExtractionArtifact(sourceKind, mimeType);
|
|
4245
4380
|
if (sourceKind === "markdown" && options.includeAssets) {
|
|
4246
4381
|
const { attachments: remoteAttachments, skippedCount } = await collectRemoteImageAttachments(
|
|
4247
|
-
extractMarkdownImageReferences(extractedText,
|
|
4382
|
+
extractMarkdownImageReferences(extractedText, finalUrl),
|
|
4248
4383
|
options
|
|
4249
4384
|
);
|
|
4250
4385
|
if (remoteAttachments.length) {
|
|
@@ -4254,7 +4389,7 @@ async function prepareUrlInput(rootDir, input, options) {
|
|
|
4254
4389
|
const replacements = new Map(
|
|
4255
4390
|
remoteAttachments.map((attachment) => [attachment.originalPath ?? "", `../assets/${sourceId}/${attachment.relativePath}`])
|
|
4256
4391
|
);
|
|
4257
|
-
extractedText = rewriteMarkdownImageReferences(extractedText,
|
|
4392
|
+
extractedText = rewriteMarkdownImageReferences(extractedText, finalUrl, replacements);
|
|
4258
4393
|
payloadBytes = Buffer.from(extractedText, "utf8");
|
|
4259
4394
|
logDetails.push(`remote_assets=${remoteAttachments.length}`);
|
|
4260
4395
|
}
|
|
@@ -4282,7 +4417,7 @@ async function prepareUrlInput(rootDir, input, options) {
|
|
|
4282
4417
|
originType: "url",
|
|
4283
4418
|
sourceKind,
|
|
4284
4419
|
language,
|
|
4285
|
-
url:
|
|
4420
|
+
url: finalUrl,
|
|
4286
4421
|
mimeType,
|
|
4287
4422
|
storedExtension,
|
|
4288
4423
|
payloadBytes,
|
|
@@ -4395,8 +4530,8 @@ async function ingestInput(rootDir, input, options) {
|
|
|
4395
4530
|
}
|
|
4396
4531
|
async function addInput(rootDir, input, options = {}) {
|
|
4397
4532
|
const { paths } = await initWorkspace(rootDir);
|
|
4398
|
-
if (!isHttpUrl(input) && !arxivIdFromInput(input)) {
|
|
4399
|
-
throw new Error("`swarmvault add` only supports URLs
|
|
4533
|
+
if (!isHttpUrl(input) && !arxivIdFromInput(input) && !doiFromInput(input)) {
|
|
4534
|
+
throw new Error("`swarmvault add` only supports URLs, bare arXiv ids, and bare DOI strings in the current release.");
|
|
4400
4535
|
}
|
|
4401
4536
|
let prepared = null;
|
|
4402
4537
|
let captureType = "url";
|
|
@@ -4409,26 +4544,55 @@ async function addInput(rootDir, input, options = {}) {
|
|
|
4409
4544
|
title: captured.title,
|
|
4410
4545
|
url: captured.normalizedUrl,
|
|
4411
4546
|
markdown: captured.markdown,
|
|
4547
|
+
sourceType: "arxiv",
|
|
4412
4548
|
logDetails: ["capture_type=arxiv"]
|
|
4413
4549
|
});
|
|
4414
4550
|
captureType = "arxiv";
|
|
4415
4551
|
normalizedUrl = captured.normalizedUrl;
|
|
4552
|
+
} else if (doiFromInput(input)) {
|
|
4553
|
+
const captured = await captureDoiMarkdown(rootDir, input, options);
|
|
4554
|
+
prepared = prepareCapturedMarkdownInput({
|
|
4555
|
+
title: captured.title,
|
|
4556
|
+
url: captured.normalizedUrl,
|
|
4557
|
+
markdown: captured.markdown,
|
|
4558
|
+
sourceType: "doi",
|
|
4559
|
+
attachments: captured.attachments,
|
|
4560
|
+
logDetails: ["capture_type=doi"]
|
|
4561
|
+
});
|
|
4562
|
+
captureType = "doi";
|
|
4563
|
+
normalizedUrl = captured.normalizedUrl;
|
|
4416
4564
|
} else if (isTweetUrl(input)) {
|
|
4417
4565
|
const captured = await captureTweetMarkdown(input, options);
|
|
4418
4566
|
prepared = prepareCapturedMarkdownInput({
|
|
4419
4567
|
title: captured.title,
|
|
4420
4568
|
url: captured.normalizedUrl,
|
|
4421
4569
|
markdown: captured.markdown,
|
|
4570
|
+
sourceType: "tweet",
|
|
4422
4571
|
logDetails: ["capture_type=tweet"]
|
|
4423
4572
|
});
|
|
4424
4573
|
captureType = "tweet";
|
|
4425
4574
|
normalizedUrl = captured.normalizedUrl;
|
|
4575
|
+
} else if (isHttpUrl(input)) {
|
|
4576
|
+
const captured = await captureArticleMarkdown(rootDir, input, options, {
|
|
4577
|
+
sourceType: "article",
|
|
4578
|
+
sourceUrl: input
|
|
4579
|
+
});
|
|
4580
|
+
prepared = prepareCapturedMarkdownInput({
|
|
4581
|
+
title: captured.title,
|
|
4582
|
+
url: captured.normalizedUrl,
|
|
4583
|
+
markdown: captured.markdown,
|
|
4584
|
+
sourceType: "article",
|
|
4585
|
+
attachments: captured.attachments,
|
|
4586
|
+
logDetails: ["capture_type=article"]
|
|
4587
|
+
});
|
|
4588
|
+
captureType = "article";
|
|
4589
|
+
normalizedUrl = captured.normalizedUrl;
|
|
4426
4590
|
}
|
|
4427
4591
|
} catch {
|
|
4428
4592
|
fallback = true;
|
|
4429
4593
|
}
|
|
4430
4594
|
if (!prepared) {
|
|
4431
|
-
normalizedUrl = arxivIdFromInput(input) ? `https://arxiv.org/abs/${arxivIdFromInput(input)}` : normalizeOriginUrl(input);
|
|
4595
|
+
normalizedUrl = arxivIdFromInput(input) ? `https://arxiv.org/abs/${arxivIdFromInput(input)}` : doiFromInput(input) ? `https://doi.org/${encodeURIComponent(doiFromInput(input) ?? "")}` : normalizeOriginUrl(input);
|
|
4432
4596
|
return {
|
|
4433
4597
|
captureType: "url",
|
|
4434
4598
|
manifest: await ingestInput(rootDir, normalizedUrl, options),
|
|
@@ -4684,7 +4848,7 @@ function buildSchemaPrompt(schema, instruction) {
|
|
|
4684
4848
|
// src/vault.ts
|
|
4685
4849
|
import fs15 from "fs/promises";
|
|
4686
4850
|
import path18 from "path";
|
|
4687
|
-
import
|
|
4851
|
+
import matter9 from "gray-matter";
|
|
4688
4852
|
import { z as z7 } from "zod";
|
|
4689
4853
|
|
|
4690
4854
|
// src/analysis.ts
|
|
@@ -4991,6 +5155,7 @@ var DEFAULT_BENCHMARK_QUESTIONS = [
|
|
|
4991
5155
|
"Where are the biggest knowledge gaps?",
|
|
4992
5156
|
"What evidence should I read first?"
|
|
4993
5157
|
];
|
|
5158
|
+
var RESEARCH_BENCHMARK_QUESTION = "Which research sources should I read first, and why?";
|
|
4994
5159
|
function nodeMap(graph) {
|
|
4995
5160
|
return new Map(graph.nodes.map((node) => [node.id, node]));
|
|
4996
5161
|
}
|
|
@@ -5040,9 +5205,68 @@ function benchmarkQueryTokens(graph, queryResult, pageContentsById) {
|
|
|
5040
5205
|
queryTokens,
|
|
5041
5206
|
reduction: 0,
|
|
5042
5207
|
visitedNodeIds: queryResult.visitedNodeIds,
|
|
5208
|
+
visitedEdgeIds: queryResult.visitedEdgeIds,
|
|
5043
5209
|
pageIds: queryResult.pageIds
|
|
5044
5210
|
};
|
|
5045
5211
|
}
|
|
5212
|
+
function graphHash(graph) {
|
|
5213
|
+
const hashedPages = graph.pages.filter((page) => page.kind !== "graph_report" && page.kind !== "community_summary");
|
|
5214
|
+
const normalized = JSON.stringify(
|
|
5215
|
+
{
|
|
5216
|
+
nodes: [...graph.nodes].map((node) => ({
|
|
5217
|
+
id: node.id,
|
|
5218
|
+
type: node.type,
|
|
5219
|
+
label: node.label,
|
|
5220
|
+
pageId: node.pageId ?? null,
|
|
5221
|
+
communityId: node.communityId ?? null,
|
|
5222
|
+
degree: node.degree ?? null,
|
|
5223
|
+
bridgeScore: node.bridgeScore ?? null,
|
|
5224
|
+
isGodNode: node.isGodNode ?? false,
|
|
5225
|
+
sourceIds: [...node.sourceIds].sort(),
|
|
5226
|
+
projectIds: [...node.projectIds].sort()
|
|
5227
|
+
})).sort((left, right) => left.id.localeCompare(right.id)),
|
|
5228
|
+
edges: [...graph.edges].map((edge) => ({
|
|
5229
|
+
id: edge.id,
|
|
5230
|
+
source: edge.source,
|
|
5231
|
+
target: edge.target,
|
|
5232
|
+
relation: edge.relation,
|
|
5233
|
+
status: edge.status,
|
|
5234
|
+
evidenceClass: edge.evidenceClass,
|
|
5235
|
+
confidence: edge.confidence,
|
|
5236
|
+
provenance: [...edge.provenance].sort()
|
|
5237
|
+
})).sort((left, right) => left.id.localeCompare(right.id)),
|
|
5238
|
+
pages: [...hashedPages].map((page) => ({
|
|
5239
|
+
id: page.id,
|
|
5240
|
+
path: page.path,
|
|
5241
|
+
kind: page.kind,
|
|
5242
|
+
status: page.status,
|
|
5243
|
+
sourceType: page.sourceType ?? null,
|
|
5244
|
+
sourceIds: [...page.sourceIds].sort(),
|
|
5245
|
+
projectIds: [...page.projectIds].sort(),
|
|
5246
|
+
nodeIds: [...page.nodeIds].sort()
|
|
5247
|
+
})).sort((left, right) => left.id.localeCompare(right.id)),
|
|
5248
|
+
communities: [...graph.communities ?? []].map((community) => ({
|
|
5249
|
+
id: community.id,
|
|
5250
|
+
label: community.label,
|
|
5251
|
+
nodeIds: [...community.nodeIds].sort()
|
|
5252
|
+
})).sort((left, right) => left.id.localeCompare(right.id))
|
|
5253
|
+
},
|
|
5254
|
+
null,
|
|
5255
|
+
0
|
|
5256
|
+
);
|
|
5257
|
+
return sha256(normalized);
|
|
5258
|
+
}
|
|
5259
|
+
function hasResearchSources(pages) {
|
|
5260
|
+
return pages.some((page) => page.kind === "source" && Boolean(page.sourceType) && page.sourceType !== "url");
|
|
5261
|
+
}
|
|
5262
|
+
function defaultBenchmarkQuestionsForGraph(graph, maxQuestions = 3) {
|
|
5263
|
+
const normalizedLimit = Math.max(1, Math.min(maxQuestions, DEFAULT_BENCHMARK_QUESTIONS.length));
|
|
5264
|
+
const questions = [...DEFAULT_BENCHMARK_QUESTIONS];
|
|
5265
|
+
if (hasResearchSources(graph.pages)) {
|
|
5266
|
+
questions.unshift(RESEARCH_BENCHMARK_QUESTION);
|
|
5267
|
+
}
|
|
5268
|
+
return uniqueBy(questions, (item) => item).slice(0, normalizedLimit);
|
|
5269
|
+
}
|
|
5046
5270
|
function buildBenchmarkArtifact(input) {
|
|
5047
5271
|
const corpusTokens = Math.max(1, Math.round(input.corpusWords * (100 / 75)));
|
|
5048
5272
|
const perQuestion = input.perQuestion.filter((entry) => entry.queryTokens > 0).map((entry) => ({
|
|
@@ -5051,8 +5275,18 @@ function buildBenchmarkArtifact(input) {
|
|
|
5051
5275
|
}));
|
|
5052
5276
|
const avgQueryTokens = perQuestion.length ? Math.max(1, Math.round(perQuestion.reduce((total, entry) => total + entry.queryTokens, 0) / perQuestion.length)) : 0;
|
|
5053
5277
|
const reductionRatio = avgQueryTokens ? Number(Math.max(0, 1 - avgQueryTokens / Math.max(1, corpusTokens)).toFixed(3)) : 0;
|
|
5278
|
+
const uniqueVisitedNodes = new Set(perQuestion.flatMap((entry) => entry.visitedNodeIds)).size;
|
|
5279
|
+
const summary = {
|
|
5280
|
+
questionCount: input.questions.length,
|
|
5281
|
+
uniqueVisitedNodes,
|
|
5282
|
+
finalContextTokens: avgQueryTokens,
|
|
5283
|
+
naiveCorpusTokens: corpusTokens,
|
|
5284
|
+
avgReduction: reductionRatio,
|
|
5285
|
+
reductionRatio
|
|
5286
|
+
};
|
|
5054
5287
|
return {
|
|
5055
5288
|
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
5289
|
+
graphHash: graphHash(input.graph),
|
|
5056
5290
|
corpusWords: input.corpusWords,
|
|
5057
5291
|
corpusTokens,
|
|
5058
5292
|
nodes: input.graph.nodes.length,
|
|
@@ -5060,7 +5294,9 @@ function buildBenchmarkArtifact(input) {
|
|
|
5060
5294
|
avgQueryTokens,
|
|
5061
5295
|
reductionRatio,
|
|
5062
5296
|
sampleQuestions: input.questions,
|
|
5063
|
-
perQuestion
|
|
5297
|
+
perQuestion,
|
|
5298
|
+
questionResults: perQuestion,
|
|
5299
|
+
summary
|
|
5064
5300
|
};
|
|
5065
5301
|
}
|
|
5066
5302
|
|
|
@@ -5083,7 +5319,7 @@ function conflictConfidence(claimA, claimB) {
|
|
|
5083
5319
|
// src/deep-lint.ts
|
|
5084
5320
|
import fs11 from "fs/promises";
|
|
5085
5321
|
import path14 from "path";
|
|
5086
|
-
import
|
|
5322
|
+
import matter4 from "gray-matter";
|
|
5087
5323
|
import { z as z5 } from "zod";
|
|
5088
5324
|
|
|
5089
5325
|
// src/findings.ts
|
|
@@ -5450,7 +5686,7 @@ async function loadContextPages(rootDir, graph) {
|
|
|
5450
5686
|
contextPages.slice(0, 18).map(async (page) => {
|
|
5451
5687
|
const absolutePath = path14.join(paths.wikiDir, page.path);
|
|
5452
5688
|
const raw = await fs11.readFile(absolutePath, "utf8").catch(() => "");
|
|
5453
|
-
const parsed =
|
|
5689
|
+
const parsed = matter4(raw);
|
|
5454
5690
|
return {
|
|
5455
5691
|
id: page.id,
|
|
5456
5692
|
title: page.title,
|
|
@@ -5978,12 +6214,15 @@ function topGodNodes(graph, limit = 10) {
|
|
|
5978
6214
|
}
|
|
5979
6215
|
|
|
5980
6216
|
// src/markdown.ts
|
|
5981
|
-
import
|
|
5982
|
-
function
|
|
6217
|
+
import matter5 from "gray-matter";
|
|
6218
|
+
function uniqueStrings2(values) {
|
|
5983
6219
|
return uniqueBy(values.filter(Boolean), (value) => value);
|
|
5984
6220
|
}
|
|
6221
|
+
function safeFrontmatter(value) {
|
|
6222
|
+
return JSON.parse(JSON.stringify(value));
|
|
6223
|
+
}
|
|
5985
6224
|
function decoratedTags(baseTags, decorations) {
|
|
5986
|
-
return
|
|
6225
|
+
return uniqueStrings2([
|
|
5987
6226
|
...baseTags,
|
|
5988
6227
|
...(decorations?.projectIds ?? []).map((projectId) => `project/${projectId}`),
|
|
5989
6228
|
...decorations?.extraTags ?? []
|
|
@@ -6062,6 +6301,7 @@ function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutput
|
|
|
6062
6301
|
page_id: pageId,
|
|
6063
6302
|
kind: "source",
|
|
6064
6303
|
title: analysis.title,
|
|
6304
|
+
...manifest.sourceType ? { source_type: manifest.sourceType } : {},
|
|
6065
6305
|
tags: decoratedTags(analysis.code ? ["source", "code"] : ["source"], decorations),
|
|
6066
6306
|
source_ids: [manifest.sourceId],
|
|
6067
6307
|
project_ids: decorations?.projectIds ?? [],
|
|
@@ -6084,6 +6324,7 @@ function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutput
|
|
|
6084
6324
|
"",
|
|
6085
6325
|
`Source ID: \`${manifest.sourceId}\``,
|
|
6086
6326
|
manifest.url ? `Source URL: ${manifest.url}` : `Source Path: \`${manifest.originalPath ?? manifest.storedPath}\``,
|
|
6327
|
+
...manifest.sourceType ? [`Source Type: \`${manifest.sourceType}\``, ""] : [""],
|
|
6087
6328
|
"",
|
|
6088
6329
|
"## Summary",
|
|
6089
6330
|
"",
|
|
@@ -6128,6 +6369,7 @@ function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutput
|
|
|
6128
6369
|
path: relativePath,
|
|
6129
6370
|
title: analysis.title,
|
|
6130
6371
|
kind: "source",
|
|
6372
|
+
sourceType: manifest.sourceType,
|
|
6131
6373
|
sourceIds: [manifest.sourceId],
|
|
6132
6374
|
projectIds: decorations?.projectIds ?? [],
|
|
6133
6375
|
nodeIds,
|
|
@@ -6145,7 +6387,7 @@ function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutput
|
|
|
6145
6387
|
compiledFrom: metadata.compiledFrom,
|
|
6146
6388
|
managedBy: metadata.managedBy
|
|
6147
6389
|
},
|
|
6148
|
-
content:
|
|
6390
|
+
content: matter5.stringify(body, safeFrontmatter(frontmatter))
|
|
6149
6391
|
};
|
|
6150
6392
|
}
|
|
6151
6393
|
function buildModulePage(input) {
|
|
@@ -6160,7 +6402,7 @@ function buildModulePage(input) {
|
|
|
6160
6402
|
const nodeIds = [code.moduleId, ...code.symbols.map((symbol) => symbol.id)];
|
|
6161
6403
|
const localModuleBacklinks = input.localModules.map((moduleRef) => moduleRef.page.id);
|
|
6162
6404
|
const relatedOutputs = input.relatedOutputs ?? [];
|
|
6163
|
-
const backlinks =
|
|
6405
|
+
const backlinks = uniqueStrings2([sourcePage.id, ...localModuleBacklinks, ...relatedOutputs.map((page) => page.id)]);
|
|
6164
6406
|
const importsSection = code.imports.length ? code.imports.map((item) => {
|
|
6165
6407
|
const localModule = item.resolvedSourceId ? input.localModules.find((moduleRef) => moduleRef.sourceId === item.resolvedSourceId && moduleRef.reExport === item.reExport) : void 0;
|
|
6166
6408
|
const importedBits = [
|
|
@@ -6206,9 +6448,9 @@ function buildModulePage(input) {
|
|
|
6206
6448
|
source_hashes: {
|
|
6207
6449
|
[manifest.sourceId]: manifest.contentHash
|
|
6208
6450
|
},
|
|
6209
|
-
related_page_ids:
|
|
6451
|
+
related_page_ids: uniqueStrings2([sourcePage.id, ...localModuleBacklinks, ...relatedOutputs.map((page) => page.id)]),
|
|
6210
6452
|
related_node_ids: [],
|
|
6211
|
-
related_source_ids:
|
|
6453
|
+
related_source_ids: uniqueStrings2([
|
|
6212
6454
|
manifest.sourceId,
|
|
6213
6455
|
...input.localModules.map((moduleRef) => moduleRef.sourceId),
|
|
6214
6456
|
...relatedOutputs.flatMap((page) => page.sourceIds)
|
|
@@ -6280,9 +6522,9 @@ function buildModulePage(input) {
|
|
|
6280
6522
|
backlinks,
|
|
6281
6523
|
schemaHash,
|
|
6282
6524
|
sourceHashes: { [manifest.sourceId]: manifest.contentHash },
|
|
6283
|
-
relatedPageIds:
|
|
6525
|
+
relatedPageIds: uniqueStrings2([sourcePage.id, ...localModuleBacklinks, ...relatedOutputs.map((page) => page.id)]),
|
|
6284
6526
|
relatedNodeIds: [],
|
|
6285
|
-
relatedSourceIds:
|
|
6527
|
+
relatedSourceIds: uniqueStrings2([
|
|
6286
6528
|
manifest.sourceId,
|
|
6287
6529
|
...input.localModules.map((moduleRef) => moduleRef.sourceId),
|
|
6288
6530
|
...relatedOutputs.flatMap((page) => page.sourceIds)
|
|
@@ -6292,7 +6534,7 @@ function buildModulePage(input) {
|
|
|
6292
6534
|
compiledFrom: metadata.compiledFrom,
|
|
6293
6535
|
managedBy: metadata.managedBy
|
|
6294
6536
|
},
|
|
6295
|
-
content:
|
|
6537
|
+
content: matter5.stringify(body, frontmatter)
|
|
6296
6538
|
};
|
|
6297
6539
|
}
|
|
6298
6540
|
function buildAggregatePage(kind, name, descriptions, sourceAnalyses, sourceHashes, schemaHash, metadata, relativePath, relatedOutputs = [], decorations) {
|
|
@@ -6363,7 +6605,7 @@ function buildAggregatePage(kind, name, descriptions, sourceAnalyses, sourceHash
|
|
|
6363
6605
|
compiledFrom: metadata.compiledFrom,
|
|
6364
6606
|
managedBy: metadata.managedBy
|
|
6365
6607
|
},
|
|
6366
|
-
content:
|
|
6608
|
+
content: matter5.stringify(body, frontmatter)
|
|
6367
6609
|
};
|
|
6368
6610
|
}
|
|
6369
6611
|
function buildIndexPage(pages, schemaHash, metadata, projectPages = []) {
|
|
@@ -6439,7 +6681,7 @@ function buildIndexPage(pages, schemaHash, metadata, projectPages = []) {
|
|
|
6439
6681
|
}
|
|
6440
6682
|
function buildSectionIndex(kind, pages, schemaHash, metadata, projectIds = []) {
|
|
6441
6683
|
const title = kind.charAt(0).toUpperCase() + kind.slice(1);
|
|
6442
|
-
return
|
|
6684
|
+
return matter5.stringify(
|
|
6443
6685
|
[`# ${title}`, "", ...pages.map((page) => `- [[${page.path.replace(/\.md$/, "")}|${page.title}]]`), ""].join("\n"),
|
|
6444
6686
|
{
|
|
6445
6687
|
page_id: `${kind}:index`,
|
|
@@ -6481,27 +6723,118 @@ function crossCommunityEdges(graph) {
|
|
|
6481
6723
|
function suggestedGraphQuestions(graph) {
|
|
6482
6724
|
const thinCommunities = (graph.communities ?? []).filter((community) => community.nodeIds.length <= 2);
|
|
6483
6725
|
const bridgeNodes = graph.nodes.filter((node) => (node.bridgeScore ?? 0) > 0).sort((left, right) => (right.bridgeScore ?? 0) - (left.bridgeScore ?? 0)).slice(0, 3);
|
|
6484
|
-
return
|
|
6726
|
+
return uniqueStrings2([
|
|
6485
6727
|
...thinCommunities.map((community) => `What sources would strengthen community ${community.label}?`),
|
|
6486
6728
|
...bridgeNodes.map((node) => `Why does ${node.label} connect multiple communities in the vault?`)
|
|
6487
6729
|
]).slice(0, 6);
|
|
6488
6730
|
}
|
|
6731
|
+
function buildGraphReportArtifact(input) {
|
|
6732
|
+
const nodesById = new Map(input.graph.nodes.map((node) => [node.id, node]));
|
|
6733
|
+
const godNodes = input.graph.nodes.filter((node) => node.isGodNode).sort((left, right) => (right.degree ?? 0) - (left.degree ?? 0)).slice(0, 8);
|
|
6734
|
+
const bridgeNodes = input.graph.nodes.filter((node) => (node.bridgeScore ?? 0) > 0).sort((left, right) => (right.bridgeScore ?? 0) - (left.bridgeScore ?? 0)).slice(0, 8);
|
|
6735
|
+
const thinCommunities = (input.graph.communities ?? []).filter((community) => community.nodeIds.length <= 2).map((community) => {
|
|
6736
|
+
const page = input.communityPages.find((candidate) => candidate.id === `graph:${community.id}`);
|
|
6737
|
+
return {
|
|
6738
|
+
id: community.id,
|
|
6739
|
+
label: community.label,
|
|
6740
|
+
nodeCount: community.nodeIds.length,
|
|
6741
|
+
pageId: page?.id,
|
|
6742
|
+
path: page?.path,
|
|
6743
|
+
title: page?.title
|
|
6744
|
+
};
|
|
6745
|
+
});
|
|
6746
|
+
const surprisingConnections = crossCommunityEdges(input.graph).slice(0, 8).map((edge) => {
|
|
6747
|
+
const source = nodesById.get(edge.source);
|
|
6748
|
+
const target = nodesById.get(edge.target);
|
|
6749
|
+
const path23 = shortestGraphPath(input.graph, edge.source, edge.target);
|
|
6750
|
+
const sourceCommunity = source?.communityId ? input.graph.communities?.find((community) => community.id === source.communityId) : void 0;
|
|
6751
|
+
const targetCommunity = target?.communityId ? input.graph.communities?.find((community) => community.id === target.communityId) : void 0;
|
|
6752
|
+
return {
|
|
6753
|
+
id: edge.id,
|
|
6754
|
+
sourceNodeId: edge.source,
|
|
6755
|
+
sourceLabel: source?.label ?? edge.source,
|
|
6756
|
+
targetNodeId: edge.target,
|
|
6757
|
+
targetLabel: target?.label ?? edge.target,
|
|
6758
|
+
relation: edge.relation,
|
|
6759
|
+
evidenceClass: edge.evidenceClass,
|
|
6760
|
+
confidence: edge.confidence,
|
|
6761
|
+
pathNodeIds: path23.nodeIds,
|
|
6762
|
+
pathEdgeIds: path23.edgeIds,
|
|
6763
|
+
pathSummary: path23.summary,
|
|
6764
|
+
explanation: normalizeWhitespace(
|
|
6765
|
+
[
|
|
6766
|
+
`${source?.label ?? edge.source} links ${sourceCommunity?.label ? `from ${sourceCommunity.label}` : ""}`.trim(),
|
|
6767
|
+
`to ${target?.label ?? edge.target}${targetCommunity?.label ? ` in ${targetCommunity.label}` : ""}`.trim(),
|
|
6768
|
+
`through ${edge.relation} with ${edge.evidenceClass} evidence at ${edge.confidence.toFixed(2)} confidence.`
|
|
6769
|
+
].join(" ")
|
|
6770
|
+
)
|
|
6771
|
+
};
|
|
6772
|
+
});
|
|
6773
|
+
return {
|
|
6774
|
+
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
6775
|
+
graphHash: input.graphHash,
|
|
6776
|
+
overview: {
|
|
6777
|
+
nodes: input.graph.nodes.length,
|
|
6778
|
+
edges: input.graph.edges.length,
|
|
6779
|
+
pages: input.graph.pages.length,
|
|
6780
|
+
communities: input.graph.communities?.length ?? 0
|
|
6781
|
+
},
|
|
6782
|
+
benchmark: input.benchmark ? {
|
|
6783
|
+
generatedAt: input.benchmark.generatedAt,
|
|
6784
|
+
stale: input.benchmarkStale ?? false,
|
|
6785
|
+
summary: input.benchmark.summary,
|
|
6786
|
+
questionCount: input.benchmark.sampleQuestions.length
|
|
6787
|
+
} : void 0,
|
|
6788
|
+
godNodes: godNodes.map((node) => ({
|
|
6789
|
+
nodeId: node.id,
|
|
6790
|
+
label: node.label,
|
|
6791
|
+
pageId: node.pageId,
|
|
6792
|
+
degree: node.degree,
|
|
6793
|
+
bridgeScore: node.bridgeScore
|
|
6794
|
+
})),
|
|
6795
|
+
bridgeNodes: bridgeNodes.map((node) => ({
|
|
6796
|
+
nodeId: node.id,
|
|
6797
|
+
label: node.label,
|
|
6798
|
+
pageId: node.pageId,
|
|
6799
|
+
degree: node.degree,
|
|
6800
|
+
bridgeScore: node.bridgeScore
|
|
6801
|
+
})),
|
|
6802
|
+
thinCommunities,
|
|
6803
|
+
surprisingConnections,
|
|
6804
|
+
suggestedQuestions: suggestedGraphQuestions(input.graph),
|
|
6805
|
+
communityPages: input.communityPages.map((page) => ({
|
|
6806
|
+
id: page.id,
|
|
6807
|
+
path: page.path,
|
|
6808
|
+
title: page.title
|
|
6809
|
+
})),
|
|
6810
|
+
recentResearchSources: (input.recentResearchSources ?? []).map((page) => ({
|
|
6811
|
+
pageId: page.id,
|
|
6812
|
+
path: page.path,
|
|
6813
|
+
title: page.title,
|
|
6814
|
+
sourceType: page.sourceType,
|
|
6815
|
+
updatedAt: page.updatedAt
|
|
6816
|
+
}))
|
|
6817
|
+
};
|
|
6818
|
+
}
|
|
6489
6819
|
function buildGraphReportPage(input) {
|
|
6490
6820
|
const pageId = "graph:report";
|
|
6491
6821
|
const pathValue = pagePathFor("graph_report", "report");
|
|
6492
6822
|
const pagesById = new Map(input.graph.pages.map((page) => [page.id, page]));
|
|
6493
6823
|
const nodesById = new Map(input.graph.nodes.map((node) => [node.id, node]));
|
|
6494
|
-
const
|
|
6495
|
-
|
|
6496
|
-
|
|
6497
|
-
|
|
6498
|
-
const
|
|
6499
|
-
|
|
6500
|
-
...
|
|
6501
|
-
...
|
|
6502
|
-
...input.
|
|
6824
|
+
const relatedNodeIds = uniqueStrings2([
|
|
6825
|
+
...input.report.godNodes.map((node) => node.nodeId),
|
|
6826
|
+
...input.report.bridgeNodes.map((node) => node.nodeId)
|
|
6827
|
+
]);
|
|
6828
|
+
const relatedPageIds = uniqueStrings2([
|
|
6829
|
+
...input.report.godNodes.map((node) => node.pageId ?? ""),
|
|
6830
|
+
...input.report.bridgeNodes.map((node) => node.pageId ?? ""),
|
|
6831
|
+
...input.report.communityPages.map((page) => page.id),
|
|
6832
|
+
...input.report.recentResearchSources.map((page) => page.pageId)
|
|
6833
|
+
]);
|
|
6834
|
+
const relatedSourceIds = uniqueStrings2([
|
|
6835
|
+
...relatedNodeIds.flatMap((nodeId) => nodesById.get(nodeId)?.sourceIds ?? []),
|
|
6836
|
+
...input.report.recentResearchSources.flatMap((page) => pagesById.get(page.pageId)?.sourceIds ?? [])
|
|
6503
6837
|
]);
|
|
6504
|
-
const relatedSourceIds = uniqueStrings(relatedNodeIds.flatMap((nodeId) => nodesById.get(nodeId)?.sourceIds ?? []));
|
|
6505
6838
|
const frontmatter = {
|
|
6506
6839
|
page_id: pageId,
|
|
6507
6840
|
kind: "graph_report",
|
|
@@ -6529,47 +6862,66 @@ function buildGraphReportPage(input) {
|
|
|
6529
6862
|
"",
|
|
6530
6863
|
"## Overview",
|
|
6531
6864
|
"",
|
|
6532
|
-
`- Nodes: ${input.
|
|
6533
|
-
`- Edges: ${input.
|
|
6534
|
-
`- Pages: ${input.
|
|
6535
|
-
`- Communities: ${input.
|
|
6865
|
+
`- Nodes: ${input.report.overview.nodes}`,
|
|
6866
|
+
`- Edges: ${input.report.overview.edges}`,
|
|
6867
|
+
`- Pages: ${input.report.overview.pages}`,
|
|
6868
|
+
`- Communities: ${input.report.overview.communities}`,
|
|
6536
6869
|
"",
|
|
6537
|
-
|
|
6538
|
-
|
|
6539
|
-
|
|
6540
|
-
`-
|
|
6541
|
-
`-
|
|
6542
|
-
`-
|
|
6543
|
-
`-
|
|
6870
|
+
"## Benchmark Summary",
|
|
6871
|
+
"",
|
|
6872
|
+
...input.report.benchmark ? [
|
|
6873
|
+
`- Generated At: ${input.report.benchmark.generatedAt}`,
|
|
6874
|
+
`- Status: ${input.report.benchmark.stale ? "Stale (graph changed since benchmark ran)" : "Fresh"}`,
|
|
6875
|
+
`- Naive Corpus Tokens: ${input.report.benchmark.summary.naiveCorpusTokens}`,
|
|
6876
|
+
`- Final Context Tokens: ${input.report.benchmark.summary.finalContextTokens}`,
|
|
6877
|
+
`- Unique Nodes Considered: ${input.report.benchmark.summary.uniqueVisitedNodes}`,
|
|
6878
|
+
`- Reduction Ratio: ${(input.report.benchmark.summary.reductionRatio * 100).toFixed(1)}%`,
|
|
6879
|
+
`- Questions: ${input.report.benchmark.questionCount}`,
|
|
6544
6880
|
""
|
|
6545
|
-
] : [],
|
|
6546
|
-
"## God Nodes",
|
|
6881
|
+
] : ["- No benchmark results yet.", ""],
|
|
6882
|
+
"## Top God Nodes",
|
|
6547
6883
|
"",
|
|
6548
|
-
...godNodes.length ? godNodes.map((node) =>
|
|
6884
|
+
...input.report.godNodes.length ? input.report.godNodes.map((node) => {
|
|
6885
|
+
const graphNode = nodesById.get(node.nodeId);
|
|
6886
|
+
return graphNode ? `- ${graphNodeLink(graphNode, pagesById)} (${nodeSummary(graphNode)})` : `- \`${node.nodeId}\``;
|
|
6887
|
+
}) : ["- No high-connectivity nodes detected."],
|
|
6549
6888
|
"",
|
|
6550
|
-
"## Bridge Nodes",
|
|
6889
|
+
"## Top Bridge Nodes",
|
|
6551
6890
|
"",
|
|
6552
|
-
...bridgeNodes.length ? bridgeNodes.map((node) =>
|
|
6891
|
+
...input.report.bridgeNodes.length ? input.report.bridgeNodes.map((node) => {
|
|
6892
|
+
const graphNode = nodesById.get(node.nodeId);
|
|
6893
|
+
return graphNode ? `- ${graphNodeLink(graphNode, pagesById)} (${nodeSummary(graphNode)})` : `- \`${node.nodeId}\``;
|
|
6894
|
+
}) : ["- No cross-community bridge nodes detected."],
|
|
6553
6895
|
"",
|
|
6554
6896
|
"## Communities",
|
|
6555
6897
|
"",
|
|
6556
|
-
...input.communityPages.length ? input.communityPages.map((page) => `- ${pageLink(page)}`) : ["- No community summaries generated yet."],
|
|
6898
|
+
...input.report.communityPages.length ? input.report.communityPages.map((page) => `- ${pageLink(page)}`) : ["- No community summaries generated yet."],
|
|
6557
6899
|
"",
|
|
6558
|
-
"## Thin
|
|
6900
|
+
"## Thin Or Underlinked Areas",
|
|
6559
6901
|
"",
|
|
6560
|
-
...thinCommunities.length ? thinCommunities.map(
|
|
6902
|
+
...input.report.thinCommunities.length ? input.report.thinCommunities.map(
|
|
6903
|
+
(community) => community.path ? `- [[${community.path.replace(/\.md$/, "")}|${community.title ?? community.label}]] (${community.nodeCount} node(s))` : `- ${community.label} (${community.nodeCount} node(s))`
|
|
6904
|
+
) : ["- No thin communities detected."],
|
|
6561
6905
|
"",
|
|
6562
|
-
"##
|
|
6906
|
+
"## Surprising Connections",
|
|
6563
6907
|
"",
|
|
6564
|
-
...
|
|
6565
|
-
const source = nodesById.get(
|
|
6566
|
-
const target = nodesById.get(
|
|
6567
|
-
|
|
6908
|
+
...input.report.surprisingConnections.length ? input.report.surprisingConnections.map((connection) => {
|
|
6909
|
+
const source = nodesById.get(connection.sourceNodeId);
|
|
6910
|
+
const target = nodesById.get(connection.targetNodeId);
|
|
6911
|
+
const sourceLabel = source ? graphNodeLink(source, pagesById) : `\`${connection.sourceNodeId}\``;
|
|
6912
|
+
const targetLabel = target ? graphNodeLink(target, pagesById) : `\`${connection.targetNodeId}\``;
|
|
6913
|
+
return `- ${sourceLabel} ${connection.relation} ${targetLabel} (${connection.evidenceClass}, ${connection.confidence.toFixed(2)}). ${connection.explanation} Path: ${connection.pathSummary}.`;
|
|
6568
6914
|
}) : ["- No cross-community links detected."],
|
|
6569
6915
|
"",
|
|
6570
|
-
"##
|
|
6916
|
+
"## New Research Sources",
|
|
6917
|
+
"",
|
|
6918
|
+
...input.report.recentResearchSources.length ? input.report.recentResearchSources.map(
|
|
6919
|
+
(page) => `- [[${page.path.replace(/\.md$/, "")}|${page.title}]] (\`${page.sourceType}\`, updated ${page.updatedAt})`
|
|
6920
|
+
) : ["- No newly captured research sources since the previous compile."],
|
|
6571
6921
|
"",
|
|
6572
|
-
|
|
6922
|
+
"## Suggested Questions",
|
|
6923
|
+
"",
|
|
6924
|
+
...input.report.suggestedQuestions.map((question) => `- ${question}`),
|
|
6573
6925
|
""
|
|
6574
6926
|
].join("\n");
|
|
6575
6927
|
return {
|
|
@@ -6595,7 +6947,7 @@ function buildGraphReportPage(input) {
|
|
|
6595
6947
|
compiledFrom: input.metadata.compiledFrom,
|
|
6596
6948
|
managedBy: input.metadata.managedBy
|
|
6597
6949
|
},
|
|
6598
|
-
content:
|
|
6950
|
+
content: matter5.stringify(body, frontmatter)
|
|
6599
6951
|
};
|
|
6600
6952
|
}
|
|
6601
6953
|
function buildCommunitySummaryPage(input) {
|
|
@@ -6604,14 +6956,14 @@ function buildCommunitySummaryPage(input) {
|
|
|
6604
6956
|
const nodesById = new Map(input.graph.nodes.map((node) => [node.id, node]));
|
|
6605
6957
|
const pagesById = new Map(input.graph.pages.map((page) => [page.id, page]));
|
|
6606
6958
|
const communityNodes = input.community.nodeIds.map((nodeId) => nodesById.get(nodeId)).filter((node) => Boolean(node));
|
|
6607
|
-
const communityPageIds =
|
|
6959
|
+
const communityPageIds = uniqueStrings2(communityNodes.map((node) => node.pageId ?? ""));
|
|
6608
6960
|
const communityPages = communityPageIds.map((id) => pagesById.get(id)).filter((page) => Boolean(page));
|
|
6609
6961
|
const externalEdges = input.graph.edges.filter((edge) => {
|
|
6610
6962
|
const source = nodesById.get(edge.source);
|
|
6611
6963
|
const target = nodesById.get(edge.target);
|
|
6612
6964
|
return source?.communityId === input.community.id && target?.communityId && target.communityId !== input.community.id;
|
|
6613
6965
|
}).slice(0, 8);
|
|
6614
|
-
const relatedSourceIds =
|
|
6966
|
+
const relatedSourceIds = uniqueStrings2(communityNodes.flatMap((node) => node.sourceIds));
|
|
6615
6967
|
const frontmatter = {
|
|
6616
6968
|
page_id: pageId,
|
|
6617
6969
|
kind: "community_summary",
|
|
@@ -6630,7 +6982,7 @@ function buildCommunitySummaryPage(input) {
|
|
|
6630
6982
|
backlinks: ["graph:report"],
|
|
6631
6983
|
schema_hash: input.schemaHash,
|
|
6632
6984
|
source_hashes: {},
|
|
6633
|
-
related_page_ids:
|
|
6985
|
+
related_page_ids: uniqueStrings2(["graph:report", ...communityPageIds]),
|
|
6634
6986
|
related_node_ids: input.community.nodeIds,
|
|
6635
6987
|
related_source_ids: relatedSourceIds
|
|
6636
6988
|
};
|
|
@@ -6669,7 +7021,7 @@ function buildCommunitySummaryPage(input) {
|
|
|
6669
7021
|
backlinks: ["graph:report"],
|
|
6670
7022
|
schemaHash: input.schemaHash,
|
|
6671
7023
|
sourceHashes: {},
|
|
6672
|
-
relatedPageIds:
|
|
7024
|
+
relatedPageIds: uniqueStrings2(["graph:report", ...communityPageIds]),
|
|
6673
7025
|
relatedNodeIds: input.community.nodeIds,
|
|
6674
7026
|
relatedSourceIds,
|
|
6675
7027
|
createdAt: input.metadata.createdAt,
|
|
@@ -6677,11 +7029,11 @@ function buildCommunitySummaryPage(input) {
|
|
|
6677
7029
|
compiledFrom: input.metadata.compiledFrom,
|
|
6678
7030
|
managedBy: input.metadata.managedBy
|
|
6679
7031
|
},
|
|
6680
|
-
content:
|
|
7032
|
+
content: matter5.stringify(body, frontmatter)
|
|
6681
7033
|
};
|
|
6682
7034
|
}
|
|
6683
7035
|
function buildProjectsIndex(projectPages, schemaHash, metadata) {
|
|
6684
|
-
return
|
|
7036
|
+
return matter5.stringify(
|
|
6685
7037
|
[
|
|
6686
7038
|
"# Projects",
|
|
6687
7039
|
"",
|
|
@@ -6711,7 +7063,7 @@ function buildProjectsIndex(projectPages, schemaHash, metadata) {
|
|
|
6711
7063
|
}
|
|
6712
7064
|
function buildProjectIndex(input) {
|
|
6713
7065
|
const title = `Project: ${input.projectId}`;
|
|
6714
|
-
return
|
|
7066
|
+
return matter5.stringify(
|
|
6715
7067
|
[
|
|
6716
7068
|
`# ${title}`,
|
|
6717
7069
|
"",
|
|
@@ -6824,7 +7176,7 @@ function buildOutputPage(input) {
|
|
|
6824
7176
|
outputFormat: input.outputFormat,
|
|
6825
7177
|
outputAssets
|
|
6826
7178
|
},
|
|
6827
|
-
content:
|
|
7179
|
+
content: matter5.stringify(
|
|
6828
7180
|
(input.outputFormat === "slides" ? [
|
|
6829
7181
|
input.answer,
|
|
6830
7182
|
"",
|
|
@@ -6950,7 +7302,7 @@ function buildExploreHubPage(input) {
|
|
|
6950
7302
|
outputFormat: input.outputFormat,
|
|
6951
7303
|
outputAssets
|
|
6952
7304
|
},
|
|
6953
|
-
content:
|
|
7305
|
+
content: matter5.stringify(
|
|
6954
7306
|
(input.outputFormat === "slides" ? [
|
|
6955
7307
|
`# ${title}`,
|
|
6956
7308
|
"",
|
|
@@ -7216,12 +7568,12 @@ function buildOutputAssetManifest(input) {
|
|
|
7216
7568
|
// src/outputs.ts
|
|
7217
7569
|
import fs13 from "fs/promises";
|
|
7218
7570
|
import path16 from "path";
|
|
7219
|
-
import
|
|
7571
|
+
import matter7 from "gray-matter";
|
|
7220
7572
|
|
|
7221
7573
|
// src/pages.ts
|
|
7222
7574
|
import fs12 from "fs/promises";
|
|
7223
7575
|
import path15 from "path";
|
|
7224
|
-
import
|
|
7576
|
+
import matter6 from "gray-matter";
|
|
7225
7577
|
function normalizeStringArray(value) {
|
|
7226
7578
|
return Array.isArray(value) ? value.filter((item) => typeof item === "string") : [];
|
|
7227
7579
|
}
|
|
@@ -7242,6 +7594,9 @@ function normalizePageStatus(value, fallback = "active") {
|
|
|
7242
7594
|
function normalizePageManager(value, fallback = "system") {
|
|
7243
7595
|
return value === "human" || value === "system" ? value : fallback;
|
|
7244
7596
|
}
|
|
7597
|
+
function normalizeSourceType(value) {
|
|
7598
|
+
return value === "arxiv" || value === "doi" || value === "tweet" || value === "article" || value === "url" ? value : void 0;
|
|
7599
|
+
}
|
|
7245
7600
|
function normalizeOutputFormat(value, fallback = "markdown") {
|
|
7246
7601
|
return value === "report" || value === "slides" || value === "chart" || value === "image" ? value : fallback;
|
|
7247
7602
|
}
|
|
@@ -7293,7 +7648,7 @@ async function loadExistingManagedPageState(absolutePath, defaults = {}) {
|
|
|
7293
7648
|
};
|
|
7294
7649
|
}
|
|
7295
7650
|
const content = await fs12.readFile(absolutePath, "utf8");
|
|
7296
|
-
const parsed =
|
|
7651
|
+
const parsed = matter6(content);
|
|
7297
7652
|
return {
|
|
7298
7653
|
status: normalizePageStatus(parsed.data.status, defaults.status ?? "active"),
|
|
7299
7654
|
managedBy: normalizePageManager(parsed.data.managed_by, defaults.managedBy ?? "system"),
|
|
@@ -7327,7 +7682,7 @@ function inferPageKind(relativePath, explicitKind = void 0) {
|
|
|
7327
7682
|
return "index";
|
|
7328
7683
|
}
|
|
7329
7684
|
function parseStoredPage(relativePath, content, defaults = {}) {
|
|
7330
|
-
const parsed =
|
|
7685
|
+
const parsed = matter6(content);
|
|
7331
7686
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
7332
7687
|
const fallbackCreatedAt = defaults.createdAt ?? now;
|
|
7333
7688
|
const fallbackUpdatedAt = defaults.updatedAt ?? fallbackCreatedAt;
|
|
@@ -7347,6 +7702,7 @@ function parseStoredPage(relativePath, content, defaults = {}) {
|
|
|
7347
7702
|
path: relativePath,
|
|
7348
7703
|
title,
|
|
7349
7704
|
kind,
|
|
7705
|
+
sourceType: normalizeSourceType(parsed.data.source_type),
|
|
7350
7706
|
sourceIds,
|
|
7351
7707
|
projectIds,
|
|
7352
7708
|
nodeIds,
|
|
@@ -7379,7 +7735,7 @@ async function loadInsightPages(wikiDir) {
|
|
|
7379
7735
|
for (const absolutePath of files) {
|
|
7380
7736
|
const relativePath = toPosix(path15.relative(wikiDir, absolutePath));
|
|
7381
7737
|
const content = await fs12.readFile(absolutePath, "utf8");
|
|
7382
|
-
const parsed =
|
|
7738
|
+
const parsed = matter6(content);
|
|
7383
7739
|
const stats = await fs12.stat(absolutePath);
|
|
7384
7740
|
const title = typeof parsed.data.title === "string" ? parsed.data.title : path15.basename(absolutePath, ".md");
|
|
7385
7741
|
const sourceIds = normalizeStringArray(parsed.data.source_ids);
|
|
@@ -7464,7 +7820,7 @@ async function loadSavedOutputPages(wikiDir) {
|
|
|
7464
7820
|
const relativePath = path16.posix.join("outputs", entry.name);
|
|
7465
7821
|
const absolutePath = path16.join(outputsDir, entry.name);
|
|
7466
7822
|
const content = await fs13.readFile(absolutePath, "utf8");
|
|
7467
|
-
const parsed =
|
|
7823
|
+
const parsed = matter7(content);
|
|
7468
7824
|
const slug = entry.name.replace(/\.md$/, "");
|
|
7469
7825
|
const title = typeof parsed.data.title === "string" ? parsed.data.title : slug;
|
|
7470
7826
|
const pageId = typeof parsed.data.page_id === "string" ? parsed.data.page_id : `output:${slug}`;
|
|
@@ -7516,7 +7872,7 @@ async function loadSavedOutputPages(wikiDir) {
|
|
|
7516
7872
|
// src/search.ts
|
|
7517
7873
|
import fs14 from "fs/promises";
|
|
7518
7874
|
import path17 from "path";
|
|
7519
|
-
import
|
|
7875
|
+
import matter8 from "gray-matter";
|
|
7520
7876
|
function getDatabaseSync() {
|
|
7521
7877
|
const builtin = process.getBuiltinModule?.("node:sqlite");
|
|
7522
7878
|
if (!builtin?.DatabaseSync) {
|
|
@@ -7534,6 +7890,9 @@ function normalizeKind(value) {
|
|
|
7534
7890
|
function normalizeStatus(value) {
|
|
7535
7891
|
return value === "draft" || value === "candidate" || value === "active" || value === "archived" ? value : void 0;
|
|
7536
7892
|
}
|
|
7893
|
+
function normalizeSourceType2(value) {
|
|
7894
|
+
return value === "arxiv" || value === "doi" || value === "tweet" || value === "article" || value === "url" ? value : void 0;
|
|
7895
|
+
}
|
|
7537
7896
|
async function rebuildSearchIndex(dbPath, pages, wikiDir) {
|
|
7538
7897
|
await ensureDir(path17.dirname(dbPath));
|
|
7539
7898
|
const DatabaseSync = getDatabaseSync();
|
|
@@ -7549,6 +7908,7 @@ async function rebuildSearchIndex(dbPath, pages, wikiDir) {
|
|
|
7549
7908
|
body TEXT NOT NULL,
|
|
7550
7909
|
kind TEXT NOT NULL,
|
|
7551
7910
|
status TEXT NOT NULL,
|
|
7911
|
+
source_type TEXT NOT NULL,
|
|
7552
7912
|
project_ids TEXT NOT NULL,
|
|
7553
7913
|
project_key TEXT NOT NULL
|
|
7554
7914
|
);
|
|
@@ -7562,12 +7922,12 @@ async function rebuildSearchIndex(dbPath, pages, wikiDir) {
|
|
|
7562
7922
|
DELETE FROM pages;
|
|
7563
7923
|
`);
|
|
7564
7924
|
const insertPage = db.prepare(
|
|
7565
|
-
"INSERT INTO pages (id, path, title, body, kind, status, project_ids, project_key) VALUES (?, ?, ?, ?, ?, ?, ?, ?)"
|
|
7925
|
+
"INSERT INTO pages (id, path, title, body, kind, status, source_type, project_ids, project_key) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)"
|
|
7566
7926
|
);
|
|
7567
7927
|
for (const page of pages) {
|
|
7568
7928
|
const absolutePath = path17.join(wikiDir, page.path);
|
|
7569
7929
|
const content = await fs14.readFile(absolutePath, "utf8");
|
|
7570
|
-
const parsed =
|
|
7930
|
+
const parsed = matter8(content);
|
|
7571
7931
|
insertPage.run(
|
|
7572
7932
|
page.id,
|
|
7573
7933
|
page.path,
|
|
@@ -7575,6 +7935,7 @@ async function rebuildSearchIndex(dbPath, pages, wikiDir) {
|
|
|
7575
7935
|
parsed.content,
|
|
7576
7936
|
page.kind,
|
|
7577
7937
|
page.status,
|
|
7938
|
+
typeof parsed.data.source_type === "string" ? parsed.data.source_type : "",
|
|
7578
7939
|
JSON.stringify(page.projectIds),
|
|
7579
7940
|
page.projectIds.map((projectId) => `|${projectId}|`).join("")
|
|
7580
7941
|
);
|
|
@@ -7608,6 +7969,10 @@ function searchPages(dbPath, query, limitOrOptions = 5) {
|
|
|
7608
7969
|
params.push(`%|${options.project}|%`);
|
|
7609
7970
|
}
|
|
7610
7971
|
}
|
|
7972
|
+
if (options.sourceType && options.sourceType !== "all") {
|
|
7973
|
+
clauses.push("pages.source_type = ?");
|
|
7974
|
+
params.push(options.sourceType);
|
|
7975
|
+
}
|
|
7611
7976
|
const statement = db.prepare(`
|
|
7612
7977
|
SELECT
|
|
7613
7978
|
pages.id AS pageId,
|
|
@@ -7615,6 +7980,7 @@ function searchPages(dbPath, query, limitOrOptions = 5) {
|
|
|
7615
7980
|
pages.title AS title,
|
|
7616
7981
|
pages.kind AS kind,
|
|
7617
7982
|
pages.status AS status,
|
|
7983
|
+
pages.source_type AS sourceType,
|
|
7618
7984
|
pages.project_ids AS projectIds,
|
|
7619
7985
|
snippet(page_search, 1, '[', ']', '...', 16) AS snippet,
|
|
7620
7986
|
bm25(page_search) AS rank
|
|
@@ -7642,13 +8008,14 @@ function searchPages(dbPath, query, limitOrOptions = 5) {
|
|
|
7642
8008
|
title: String(row.title ?? ""),
|
|
7643
8009
|
kind: normalizeKind(row.kind),
|
|
7644
8010
|
status: normalizeStatus(row.status),
|
|
8011
|
+
sourceType: normalizeSourceType2(row.sourceType),
|
|
7645
8012
|
snippet: String(row.snippet ?? ""),
|
|
7646
8013
|
rank: Number(row.rank ?? 0)
|
|
7647
8014
|
}));
|
|
7648
8015
|
}
|
|
7649
8016
|
|
|
7650
8017
|
// src/vault.ts
|
|
7651
|
-
function
|
|
8018
|
+
function uniqueStrings3(values) {
|
|
7652
8019
|
return uniqueBy(values.filter(Boolean), (value) => value);
|
|
7653
8020
|
}
|
|
7654
8021
|
function normalizeOutputFormat2(format) {
|
|
@@ -7809,7 +8176,7 @@ async function resolveImageGenerationProvider(rootDir) {
|
|
|
7809
8176
|
if (!providerConfig) {
|
|
7810
8177
|
throw new Error(`No provider configured with id "${preferredProviderId}" for task "imageProvider".`);
|
|
7811
8178
|
}
|
|
7812
|
-
const { createProvider: createProvider2 } = await import("./registry-
|
|
8179
|
+
const { createProvider: createProvider2 } = await import("./registry-6KZMA3XM.js");
|
|
7813
8180
|
return createProvider2(preferredProviderId, providerConfig, rootDir);
|
|
7814
8181
|
}
|
|
7815
8182
|
async function generateOutputArtifacts(rootDir, input) {
|
|
@@ -8013,7 +8380,7 @@ function normalizeProjectRoot(root) {
|
|
|
8013
8380
|
function projectEntries(config) {
|
|
8014
8381
|
return Object.entries(config.projects ?? {}).map(([id, project]) => ({
|
|
8015
8382
|
id,
|
|
8016
|
-
roots:
|
|
8383
|
+
roots: uniqueStrings3(project.roots.map(normalizeProjectRoot)).filter(Boolean),
|
|
8017
8384
|
schemaPath: project.schemaPath
|
|
8018
8385
|
})).sort((left, right) => left.id.localeCompare(right.id));
|
|
8019
8386
|
}
|
|
@@ -8061,11 +8428,11 @@ function resolveSourceProjects(rootDir, manifests, config) {
|
|
|
8061
8428
|
return Object.fromEntries(manifests.map((manifest) => [manifest.sourceId, resolveSourceProjectId(rootDir, manifest, config)]));
|
|
8062
8429
|
}
|
|
8063
8430
|
function scopedProjectIdsFromSources(sourceIds, sourceProjects) {
|
|
8064
|
-
const projectIds =
|
|
8431
|
+
const projectIds = uniqueStrings3(sourceIds.map((sourceId) => sourceProjects[sourceId] ?? "").filter(Boolean));
|
|
8065
8432
|
return projectIds.length === 1 ? projectIds : [];
|
|
8066
8433
|
}
|
|
8067
8434
|
function schemaProjectIdsFromPages(pageIds, pageMap2) {
|
|
8068
|
-
return
|
|
8435
|
+
return uniqueStrings3(
|
|
8069
8436
|
pageIds.flatMap((pageId) => pageMap2.get(pageId)?.projectIds ?? []).filter(Boolean).sort((left, right) => left.localeCompare(right))
|
|
8070
8437
|
);
|
|
8071
8438
|
}
|
|
@@ -8074,7 +8441,7 @@ function categoryTagsForSchema(schema, texts) {
|
|
|
8074
8441
|
if (!haystack) {
|
|
8075
8442
|
return [];
|
|
8076
8443
|
}
|
|
8077
|
-
return
|
|
8444
|
+
return uniqueStrings3(
|
|
8078
8445
|
schemaCategoryLabels({ path: "", hash: "", content: schema.content }).filter((label) => haystack.includes(label.toLowerCase())).map((label) => `category/${slugify(label)}`)
|
|
8079
8446
|
).slice(0, 3);
|
|
8080
8447
|
}
|
|
@@ -8285,7 +8652,7 @@ async function buildManagedContent(absolutePath, defaults, build) {
|
|
|
8285
8652
|
return content;
|
|
8286
8653
|
}
|
|
8287
8654
|
function indexCompiledFrom(pages) {
|
|
8288
|
-
return
|
|
8655
|
+
return uniqueStrings3(pages.flatMap((page) => page.sourceIds));
|
|
8289
8656
|
}
|
|
8290
8657
|
function deriveGraphMetrics(nodes, edges) {
|
|
8291
8658
|
const adjacency = /* @__PURE__ */ new Map();
|
|
@@ -8688,7 +9055,19 @@ function buildGraph(manifests, analyses, pages, sourceProjects, _codeIndex) {
|
|
|
8688
9055
|
pages
|
|
8689
9056
|
};
|
|
8690
9057
|
}
|
|
8691
|
-
|
|
9058
|
+
function recentResearchSourcePages(graph, previousCompiledAt) {
|
|
9059
|
+
const previousTimestamp = previousCompiledAt ? Date.parse(previousCompiledAt) : Number.NaN;
|
|
9060
|
+
return graph.pages.filter(
|
|
9061
|
+
(page) => page.kind === "source" && Boolean(page.sourceType) && page.sourceType !== "url"
|
|
9062
|
+
).filter((page) => Number.isNaN(previousTimestamp) || Date.parse(page.updatedAt) > previousTimestamp).sort((left, right) => right.updatedAt.localeCompare(left.updatedAt) || left.title.localeCompare(right.title)).slice(0, 8).map((page) => ({
|
|
9063
|
+
id: page.id,
|
|
9064
|
+
path: page.path,
|
|
9065
|
+
title: page.title,
|
|
9066
|
+
updatedAt: page.updatedAt,
|
|
9067
|
+
sourceType: page.sourceType
|
|
9068
|
+
}));
|
|
9069
|
+
}
|
|
9070
|
+
async function buildGraphOrientationPages(graph, paths, schemaHash, previousCompiledAt) {
|
|
8692
9071
|
const benchmark = await readJsonFile(paths.benchmarkPath);
|
|
8693
9072
|
const communityRecords = [];
|
|
8694
9073
|
for (const community of graph.communities ?? []) {
|
|
@@ -8698,7 +9077,7 @@ async function buildGraphOrientationPages(graph, paths, schemaHash) {
|
|
|
8698
9077
|
absolutePath,
|
|
8699
9078
|
{
|
|
8700
9079
|
managedBy: "system",
|
|
8701
|
-
compiledFrom:
|
|
9080
|
+
compiledFrom: uniqueStrings3(
|
|
8702
9081
|
community.nodeIds.flatMap((nodeId) => graph.nodes.find((node) => node.id === nodeId)?.sourceIds ?? [])
|
|
8703
9082
|
),
|
|
8704
9083
|
confidence: 1
|
|
@@ -8712,23 +9091,33 @@ async function buildGraphOrientationPages(graph, paths, schemaHash) {
|
|
|
8712
9091
|
)
|
|
8713
9092
|
);
|
|
8714
9093
|
}
|
|
9094
|
+
const report = buildGraphReportArtifact({
|
|
9095
|
+
graph,
|
|
9096
|
+
communityPages: communityRecords.map((record) => record.page),
|
|
9097
|
+
benchmark,
|
|
9098
|
+
benchmarkStale: benchmark ? benchmark.graphHash !== graphHash(graph) : false,
|
|
9099
|
+
recentResearchSources: recentResearchSourcePages(graph, previousCompiledAt),
|
|
9100
|
+
graphHash: graphHash(graph)
|
|
9101
|
+
});
|
|
8715
9102
|
const reportAbsolutePath = path18.join(paths.wikiDir, "graph", "report.md");
|
|
8716
9103
|
const reportRecord = await buildManagedGraphPage(
|
|
8717
9104
|
reportAbsolutePath,
|
|
8718
9105
|
{
|
|
8719
9106
|
managedBy: "system",
|
|
8720
|
-
compiledFrom:
|
|
9107
|
+
compiledFrom: uniqueStrings3(graph.pages.flatMap((page) => page.sourceIds)),
|
|
8721
9108
|
confidence: 1
|
|
8722
9109
|
},
|
|
8723
9110
|
(metadata) => buildGraphReportPage({
|
|
8724
9111
|
graph,
|
|
8725
9112
|
schemaHash,
|
|
8726
9113
|
metadata,
|
|
8727
|
-
|
|
8728
|
-
benchmark
|
|
9114
|
+
report
|
|
8729
9115
|
})
|
|
8730
9116
|
);
|
|
8731
|
-
return
|
|
9117
|
+
return {
|
|
9118
|
+
records: [reportRecord, ...communityRecords],
|
|
9119
|
+
report
|
|
9120
|
+
};
|
|
8732
9121
|
}
|
|
8733
9122
|
async function writePage(wikiDir, relativePath, content, changedPages) {
|
|
8734
9123
|
const absolutePath = path18.resolve(wikiDir, relativePath);
|
|
@@ -9025,7 +9414,7 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
9025
9414
|
const itemKind = kind === "concepts" ? "concept" : "entity";
|
|
9026
9415
|
const slug = slugify(aggregate.name);
|
|
9027
9416
|
const pageId = `${itemKind}:${slug}`;
|
|
9028
|
-
const sourceIds =
|
|
9417
|
+
const sourceIds = uniqueStrings3(aggregate.sourceAnalyses.map((item) => item.sourceId));
|
|
9029
9418
|
const projectIds = scopedProjectIdsFromSources(sourceIds, input.sourceProjects);
|
|
9030
9419
|
const schemaHash = effectiveHashForProject(input.schemas, projectIds[0] ?? null);
|
|
9031
9420
|
const previousEntry = input.previousState?.candidateHistory?.[pageId];
|
|
@@ -9091,9 +9480,9 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
9091
9480
|
const compiledPages = records.map((record) => record.page);
|
|
9092
9481
|
const basePages = [...compiledPages, ...input.outputPages, ...input.insightPages];
|
|
9093
9482
|
const baseGraph = buildGraph(input.manifests, input.analyses, basePages, input.sourceProjects, input.codeIndex);
|
|
9094
|
-
const
|
|
9095
|
-
records.push(...
|
|
9096
|
-
const allPages = [...basePages, ...
|
|
9483
|
+
const graphOrientation = await buildGraphOrientationPages(baseGraph, paths, globalSchemaHash, input.previousState?.generatedAt);
|
|
9484
|
+
records.push(...graphOrientation.records);
|
|
9485
|
+
const allPages = [...basePages, ...graphOrientation.records.map((record) => record.page)];
|
|
9097
9486
|
const graph = {
|
|
9098
9487
|
...baseGraph,
|
|
9099
9488
|
pages: allPages
|
|
@@ -9226,7 +9615,7 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
9226
9615
|
const nextPagePaths = new Set(records.map((record) => record.page.path));
|
|
9227
9616
|
const obsoleteGraphPaths = (previousGraph?.pages ?? []).filter((page) => page.kind !== "output" && page.kind !== "insight").map((page) => page.path).filter((relativePath) => !nextPagePaths.has(relativePath));
|
|
9228
9617
|
const existingProjectIndexPaths = (await listFilesRecursive(paths.projectsDir)).filter((absolutePath) => absolutePath.endsWith(".md")).map((absolutePath) => toPosix(path18.relative(paths.wikiDir, absolutePath))).filter((relativePath) => !nextPagePaths.has(relativePath));
|
|
9229
|
-
const obsoletePaths =
|
|
9618
|
+
const obsoletePaths = uniqueStrings3([...obsoleteGraphPaths, ...existingProjectIndexPaths]);
|
|
9230
9619
|
const changedFiles = [];
|
|
9231
9620
|
for (const record of records) {
|
|
9232
9621
|
const absolutePath = path18.join(paths.wikiDir, record.page.path);
|
|
@@ -9258,6 +9647,7 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
9258
9647
|
await fs15.rm(path18.join(paths.wikiDir, relativePath), { force: true });
|
|
9259
9648
|
}
|
|
9260
9649
|
await writeJsonFile(paths.graphPath, graph);
|
|
9650
|
+
await writeJsonFile(path18.join(paths.wikiDir, "graph", "report.json"), graphOrientation.report);
|
|
9261
9651
|
await writeJsonFile(paths.codeIndexPath, input.codeIndex);
|
|
9262
9652
|
await writeJsonFile(paths.compileStatePath, {
|
|
9263
9653
|
generatedAt: graph.generatedAt,
|
|
@@ -9283,7 +9673,7 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
9283
9673
|
return {
|
|
9284
9674
|
graph,
|
|
9285
9675
|
allPages,
|
|
9286
|
-
changedPages:
|
|
9676
|
+
changedPages: uniqueStrings3([...changedPages, ...writeChanges]),
|
|
9287
9677
|
promotedPageIds,
|
|
9288
9678
|
candidatePageCount: candidatePages.length,
|
|
9289
9679
|
staged: false
|
|
@@ -9292,18 +9682,20 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
9292
9682
|
async function refreshIndexesAndSearch(rootDir, pages) {
|
|
9293
9683
|
const { config, paths } = await loadVaultConfig(rootDir);
|
|
9294
9684
|
const schemas = await loadVaultSchemas(rootDir);
|
|
9685
|
+
const compileState = await readJsonFile(paths.compileStatePath);
|
|
9295
9686
|
const globalSchemaHash = schemas.effective.global.hash;
|
|
9296
9687
|
const currentGraph = await readJsonFile(paths.graphPath);
|
|
9297
9688
|
const basePages = pages.filter((page) => page.kind !== "graph_report" && page.kind !== "community_summary");
|
|
9298
|
-
const
|
|
9689
|
+
const graphOrientation = currentGraph ? await buildGraphOrientationPages(
|
|
9299
9690
|
{
|
|
9300
9691
|
...currentGraph,
|
|
9301
9692
|
pages: basePages
|
|
9302
9693
|
},
|
|
9303
9694
|
paths,
|
|
9304
|
-
globalSchemaHash
|
|
9305
|
-
|
|
9306
|
-
|
|
9695
|
+
globalSchemaHash,
|
|
9696
|
+
compileState?.generatedAt
|
|
9697
|
+
) : { records: [], report: null };
|
|
9698
|
+
const pagesWithGraph = sortGraphPages([...basePages, ...graphOrientation.records.map((record) => record.page)]);
|
|
9307
9699
|
if (currentGraph) {
|
|
9308
9700
|
await writeJsonFile(paths.graphPath, {
|
|
9309
9701
|
...currentGraph,
|
|
@@ -9409,9 +9801,12 @@ async function refreshIndexesAndSearch(rootDir, pages) {
|
|
|
9409
9801
|
)
|
|
9410
9802
|
);
|
|
9411
9803
|
}
|
|
9412
|
-
for (const record of
|
|
9804
|
+
for (const record of graphOrientation.records) {
|
|
9413
9805
|
await writeFileIfChanged(path18.join(paths.wikiDir, record.page.path), record.content);
|
|
9414
9806
|
}
|
|
9807
|
+
if (graphOrientation.report) {
|
|
9808
|
+
await writeJsonFile(path18.join(paths.wikiDir, "graph", "report.json"), graphOrientation.report);
|
|
9809
|
+
}
|
|
9415
9810
|
const existingProjectIndexPaths = (await listFilesRecursive(paths.projectsDir)).filter((absolutePath) => absolutePath.endsWith(".md")).map((absolutePath) => toPosix(path18.relative(paths.wikiDir, absolutePath)));
|
|
9416
9811
|
const allowedProjectIndexPaths = /* @__PURE__ */ new Set([
|
|
9417
9812
|
"projects/index.md",
|
|
@@ -9421,7 +9816,7 @@ async function refreshIndexesAndSearch(rootDir, pages) {
|
|
|
9421
9816
|
existingProjectIndexPaths.filter((relativePath) => !allowedProjectIndexPaths.has(relativePath)).map((relativePath) => fs15.rm(path18.join(paths.wikiDir, relativePath), { force: true }))
|
|
9422
9817
|
);
|
|
9423
9818
|
const existingGraphPages = (await listFilesRecursive(path18.join(paths.wikiDir, "graph").replace(/\/$/, "")).catch(() => [])).filter((absolutePath) => absolutePath.endsWith(".md")).map((absolutePath) => toPosix(path18.relative(paths.wikiDir, absolutePath)));
|
|
9424
|
-
const allowedGraphPages = /* @__PURE__ */ new Set(["graph/index.md", ...
|
|
9819
|
+
const allowedGraphPages = /* @__PURE__ */ new Set(["graph/index.md", ...graphOrientation.records.map((record) => record.page.path)]);
|
|
9425
9820
|
await Promise.all(
|
|
9426
9821
|
existingGraphPages.filter((relativePath) => !allowedGraphPages.has(relativePath)).map((relativePath) => fs15.rm(path18.join(paths.wikiDir, relativePath), { force: true }))
|
|
9427
9822
|
);
|
|
@@ -9438,7 +9833,7 @@ async function prepareOutputPageSave(rootDir, input) {
|
|
|
9438
9833
|
status: "active",
|
|
9439
9834
|
createdAt: now,
|
|
9440
9835
|
updatedAt: now,
|
|
9441
|
-
compiledFrom:
|
|
9836
|
+
compiledFrom: uniqueStrings3(input.relatedSourceIds ?? input.citations),
|
|
9442
9837
|
managedBy: "system",
|
|
9443
9838
|
confidence: 0.74
|
|
9444
9839
|
}
|
|
@@ -9479,7 +9874,7 @@ async function prepareExploreHubSave(rootDir, input) {
|
|
|
9479
9874
|
status: "active",
|
|
9480
9875
|
createdAt: now,
|
|
9481
9876
|
updatedAt: now,
|
|
9482
|
-
compiledFrom:
|
|
9877
|
+
compiledFrom: uniqueStrings3(input.citations),
|
|
9483
9878
|
managedBy: "system",
|
|
9484
9879
|
confidence: 0.76
|
|
9485
9880
|
}
|
|
@@ -9577,7 +9972,7 @@ async function executeQuery(rootDir, question, format) {
|
|
|
9577
9972
|
const absolutePath = path18.join(paths.wikiDir, result.path);
|
|
9578
9973
|
try {
|
|
9579
9974
|
const content = await fs15.readFile(absolutePath, "utf8");
|
|
9580
|
-
const parsed =
|
|
9975
|
+
const parsed = matter9(content);
|
|
9581
9976
|
return `# ${result.title}
|
|
9582
9977
|
${truncate(normalizeWhitespace(parsed.content), 1200)}`;
|
|
9583
9978
|
} catch {
|
|
@@ -9927,13 +10322,13 @@ async function promoteCandidate(rootDir, target) {
|
|
|
9927
10322
|
const graph = await readJsonFile(paths.graphPath);
|
|
9928
10323
|
const candidate = resolveCandidateTarget(graph?.pages ?? [], target);
|
|
9929
10324
|
const raw = await fs15.readFile(path18.join(paths.wikiDir, candidate.path), "utf8");
|
|
9930
|
-
const parsed =
|
|
10325
|
+
const parsed = matter9(raw);
|
|
9931
10326
|
const nextUpdatedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
9932
|
-
const nextContent =
|
|
10327
|
+
const nextContent = matter9.stringify(parsed.content, {
|
|
9933
10328
|
...parsed.data,
|
|
9934
10329
|
status: "active",
|
|
9935
10330
|
updated_at: nextUpdatedAt,
|
|
9936
|
-
tags:
|
|
10331
|
+
tags: uniqueStrings3([candidate.kind, ...Array.isArray(parsed.data.tags) ? parsed.data.tags : []]).filter(
|
|
9937
10332
|
(tag) => tag !== "candidate"
|
|
9938
10333
|
)
|
|
9939
10334
|
});
|
|
@@ -10075,7 +10470,7 @@ async function initVault(rootDir, options = {}) {
|
|
|
10075
10470
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
10076
10471
|
await writeFileIfChanged(
|
|
10077
10472
|
insightsIndexPath,
|
|
10078
|
-
|
|
10473
|
+
matter9.stringify(
|
|
10079
10474
|
[
|
|
10080
10475
|
"# Insights",
|
|
10081
10476
|
"",
|
|
@@ -10108,7 +10503,7 @@ async function initVault(rootDir, options = {}) {
|
|
|
10108
10503
|
);
|
|
10109
10504
|
await writeFileIfChanged(
|
|
10110
10505
|
path18.join(paths.wikiDir, "projects", "index.md"),
|
|
10111
|
-
|
|
10506
|
+
matter9.stringify(["# Projects", "", "- Run `swarmvault compile` to build project rollups.", ""].join("\n"), {
|
|
10112
10507
|
page_id: "projects:index",
|
|
10113
10508
|
kind: "index",
|
|
10114
10509
|
title: "Projects",
|
|
@@ -10130,7 +10525,7 @@ async function initVault(rootDir, options = {}) {
|
|
|
10130
10525
|
);
|
|
10131
10526
|
await writeFileIfChanged(
|
|
10132
10527
|
path18.join(paths.wikiDir, "candidates", "index.md"),
|
|
10133
|
-
|
|
10528
|
+
matter9.stringify(["# Candidates", "", "- Run `swarmvault compile` to stage candidate pages.", ""].join("\n"), {
|
|
10134
10529
|
page_id: "candidates:index",
|
|
10135
10530
|
kind: "index",
|
|
10136
10531
|
title: "Candidates",
|
|
@@ -10154,6 +10549,20 @@ async function initVault(rootDir, options = {}) {
|
|
|
10154
10549
|
await ensureObsidianWorkspace(rootDir);
|
|
10155
10550
|
}
|
|
10156
10551
|
}
|
|
10552
|
+
async function runConfiguredBenchmark(rootDir, config) {
|
|
10553
|
+
if (config.benchmark?.enabled === false) {
|
|
10554
|
+
return { ok: true };
|
|
10555
|
+
}
|
|
10556
|
+
try {
|
|
10557
|
+
await benchmarkVault(rootDir);
|
|
10558
|
+
return { ok: true };
|
|
10559
|
+
} catch (error) {
|
|
10560
|
+
return {
|
|
10561
|
+
ok: false,
|
|
10562
|
+
error: error instanceof Error ? error.message : String(error)
|
|
10563
|
+
};
|
|
10564
|
+
}
|
|
10565
|
+
}
|
|
10157
10566
|
async function compileVault(rootDir, options = {}) {
|
|
10158
10567
|
const startedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
10159
10568
|
const { config, paths } = await initWorkspace(rootDir);
|
|
@@ -10169,7 +10578,7 @@ async function compileVault(rootDir, options = {}) {
|
|
|
10169
10578
|
const currentInsightHashes = pageHashes(storedInsightPages);
|
|
10170
10579
|
const previousState = await readJsonFile(paths.compileStatePath);
|
|
10171
10580
|
const rootSchemaChanged = !previousState || previousState.rootSchemaHash !== schemas.root.hash;
|
|
10172
|
-
const effectiveSchemaChanged = !previousState || previousGlobalSchemaHash(previousState) !== schemas.effective.global.hash ||
|
|
10581
|
+
const effectiveSchemaChanged = !previousState || previousGlobalSchemaHash(previousState) !== schemas.effective.global.hash || uniqueStrings3([...Object.keys(previousState?.effectiveSchemaHashes?.projects ?? {}), ...Object.keys(schemas.effective.projects)]).some(
|
|
10173
10582
|
(projectId) => previousProjectSchemaHash(previousState, projectId) !== effectiveHashForProject(schemas, projectId)
|
|
10174
10583
|
);
|
|
10175
10584
|
const nextProjectConfigHash = projectConfigHash(config);
|
|
@@ -10202,6 +10611,10 @@ async function compileVault(rootDir, options = {}) {
|
|
|
10202
10611
|
}
|
|
10203
10612
|
if (dirty.length === 0 && !rootSchemaChanged && !effectiveSchemaChanged && !projectConfigChanged && !sourcesChanged && !outputsChanged && !insightsChanged && !pendingCandidatePromotion && artifactsExist && !options.approve) {
|
|
10204
10613
|
const graph = await readJsonFile(paths.graphPath);
|
|
10614
|
+
const benchmark2 = await runConfiguredBenchmark(rootDir, config);
|
|
10615
|
+
if (graph && benchmark2.ok) {
|
|
10616
|
+
await refreshIndexesAndSearch(rootDir, graph.pages);
|
|
10617
|
+
}
|
|
10205
10618
|
await recordSession(rootDir, {
|
|
10206
10619
|
operation: "compile",
|
|
10207
10620
|
title: `Compiled ${manifests.length} source(s)`,
|
|
@@ -10219,7 +10632,8 @@ async function compileVault(rootDir, options = {}) {
|
|
|
10219
10632
|
`clean=${manifests.length}`,
|
|
10220
10633
|
`outputs=${outputPages.length}`,
|
|
10221
10634
|
`insights=${insightPages.length}`,
|
|
10222
|
-
`schema=${schemas.effective.global.hash.slice(0, 12)}
|
|
10635
|
+
`schema=${schemas.effective.global.hash.slice(0, 12)}`,
|
|
10636
|
+
`benchmark=${benchmark2.ok ? "ok" : `error:${benchmark2.error}`}`
|
|
10223
10637
|
]
|
|
10224
10638
|
});
|
|
10225
10639
|
return {
|
|
@@ -10337,6 +10751,10 @@ async function compileVault(rootDir, options = {}) {
|
|
|
10337
10751
|
postPassApprovalDir = staged.approvalDir;
|
|
10338
10752
|
}
|
|
10339
10753
|
}
|
|
10754
|
+
const benchmark = options.approve ? { ok: true } : await runConfiguredBenchmark(rootDir, config);
|
|
10755
|
+
if (!options.approve && benchmark.ok) {
|
|
10756
|
+
await refreshIndexesAndSearch(rootDir, sync.allPages);
|
|
10757
|
+
}
|
|
10340
10758
|
await recordSession(rootDir, {
|
|
10341
10759
|
operation: "compile",
|
|
10342
10760
|
title: `Compiled ${manifests.length} source(s)`,
|
|
@@ -10358,7 +10776,8 @@ async function compileVault(rootDir, options = {}) {
|
|
|
10358
10776
|
`promoted=${sync.promotedPageIds.length}`,
|
|
10359
10777
|
`staged=${sync.staged}`,
|
|
10360
10778
|
`postPassApproval=${postPassApprovalId ?? "none"}`,
|
|
10361
|
-
`schema=${schemas.effective.global.hash.slice(0, 12)}
|
|
10779
|
+
`schema=${schemas.effective.global.hash.slice(0, 12)}`,
|
|
10780
|
+
`benchmark=${benchmark.ok ? "ok" : `error:${benchmark.error}`}`
|
|
10362
10781
|
]
|
|
10363
10782
|
});
|
|
10364
10783
|
return {
|
|
@@ -10628,7 +11047,7 @@ ${orchestrationNotes.join("\n")}
|
|
|
10628
11047
|
citations: allCitations,
|
|
10629
11048
|
format: outputFormat,
|
|
10630
11049
|
relatedPageCount: stepPages.length,
|
|
10631
|
-
relatedNodeCount:
|
|
11050
|
+
relatedNodeCount: uniqueStrings3(stepPages.flatMap((page) => page.nodeIds)).length,
|
|
10632
11051
|
projectId: stepPages[0]?.projectIds[0] ?? null
|
|
10633
11052
|
});
|
|
10634
11053
|
const hubInput = {
|
|
@@ -10638,7 +11057,7 @@ ${orchestrationNotes.join("\n")}
|
|
|
10638
11057
|
citations: allCitations,
|
|
10639
11058
|
schemaHash: composeVaultSchema(
|
|
10640
11059
|
schemas.root,
|
|
10641
|
-
|
|
11060
|
+
uniqueStrings3(stepPages.flatMap((page) => page.projectIds).sort((left, right) => left.localeCompare(right))).map((projectId) => schemas.projects[projectId]).filter((schema) => Boolean(schema?.hash))
|
|
10642
11061
|
).hash,
|
|
10643
11062
|
outputFormat,
|
|
10644
11063
|
outputAssets: hubAssetBundle.outputAssets,
|
|
@@ -10698,7 +11117,7 @@ ${orchestrationNotes.join("\n")}
|
|
|
10698
11117
|
providerId: provider.id,
|
|
10699
11118
|
success: true,
|
|
10700
11119
|
relatedSourceIds: [...relatedSourceIds],
|
|
10701
|
-
relatedPageIds:
|
|
11120
|
+
relatedPageIds: uniqueStrings3([...relatedPageIds, ...stepPages.map((page) => page.id), hubPage.id]),
|
|
10702
11121
|
relatedNodeIds: [...relatedNodeIds],
|
|
10703
11122
|
citations: allCitations,
|
|
10704
11123
|
tokenUsage: tokenUsage.inputTokens > 0 || tokenUsage.outputTokens > 0 ? {
|
|
@@ -10753,7 +11172,7 @@ async function queryGraphVault(rootDir, question, options = {}) {
|
|
|
10753
11172
|
return queryGraph(graph, question, searchResults, options);
|
|
10754
11173
|
}
|
|
10755
11174
|
async function benchmarkVault(rootDir, options = {}) {
|
|
10756
|
-
const { paths } = await loadVaultConfig(rootDir);
|
|
11175
|
+
const { config, paths } = await loadVaultConfig(rootDir);
|
|
10757
11176
|
const graph = await ensureCompiledGraph(rootDir);
|
|
10758
11177
|
const manifests = await listManifests(rootDir);
|
|
10759
11178
|
const pageContentsById = /* @__PURE__ */ new Map();
|
|
@@ -10769,11 +11188,13 @@ async function benchmarkVault(rootDir, options = {}) {
|
|
|
10769
11188
|
if (!await fileExists(absolutePath)) {
|
|
10770
11189
|
continue;
|
|
10771
11190
|
}
|
|
10772
|
-
const parsed =
|
|
11191
|
+
const parsed = matter9(await fs15.readFile(absolutePath, "utf8"));
|
|
10773
11192
|
pageContentsById.set(page.id, parsed.content);
|
|
10774
11193
|
}
|
|
11194
|
+
const configuredQuestions = (config.benchmark?.questions ?? []).map((question) => normalizeWhitespace(question)).filter(Boolean);
|
|
11195
|
+
const maxQuestions = Math.max(1, options.maxQuestions ?? config.benchmark?.maxQuestions ?? 3);
|
|
10775
11196
|
const questions = (options.questions ?? []).map((question) => normalizeWhitespace(question)).filter(Boolean);
|
|
10776
|
-
const sampleQuestions = questions.length ? questions :
|
|
11197
|
+
const sampleQuestions = (questions.length ? questions : configuredQuestions.length ? configuredQuestions : defaultBenchmarkQuestionsForGraph(graph, maxQuestions)).slice(0, maxQuestions);
|
|
10777
11198
|
const perQuestion = sampleQuestions.map((question) => {
|
|
10778
11199
|
const searchResults = searchPages(paths.searchDbPath, question, { limit: 12 });
|
|
10779
11200
|
const result = queryGraph(graph, question, searchResults, { budget: 12 });
|
|
@@ -10783,6 +11204,7 @@ async function benchmarkVault(rootDir, options = {}) {
|
|
|
10783
11204
|
queryTokens: metrics.queryTokens,
|
|
10784
11205
|
reduction: metrics.reduction,
|
|
10785
11206
|
visitedNodeIds: result.visitedNodeIds,
|
|
11207
|
+
visitedEdgeIds: result.visitedEdgeIds,
|
|
10786
11208
|
pageIds: result.pageIds
|
|
10787
11209
|
};
|
|
10788
11210
|
});
|
|
@@ -10820,7 +11242,7 @@ async function readPage(rootDir, relativePath) {
|
|
|
10820
11242
|
return null;
|
|
10821
11243
|
}
|
|
10822
11244
|
const raw = await fs15.readFile(absolutePath, "utf8");
|
|
10823
|
-
const parsed =
|
|
11245
|
+
const parsed = matter9(raw);
|
|
10824
11246
|
return {
|
|
10825
11247
|
path: relativePath,
|
|
10826
11248
|
title: typeof parsed.data.title === "string" ? parsed.data.title : path18.basename(relativePath, path18.extname(relativePath)),
|
|
@@ -10947,7 +11369,7 @@ async function lintVault(rootDir, options = {}) {
|
|
|
10947
11369
|
providerId: provider?.id,
|
|
10948
11370
|
success: true,
|
|
10949
11371
|
relatedPageIds: graph.pages.map((page) => page.id),
|
|
10950
|
-
relatedSourceIds:
|
|
11372
|
+
relatedSourceIds: uniqueStrings3(graph.pages.flatMap((page) => page.sourceIds)),
|
|
10951
11373
|
lintFindingCount: findings.length,
|
|
10952
11374
|
lines: [`findings=${findings.length}`, `deep=${Boolean(options.deep)}`, `web=${Boolean(options.web)}`]
|
|
10953
11375
|
});
|
|
@@ -11598,7 +12020,7 @@ import fs18 from "fs/promises";
|
|
|
11598
12020
|
import http from "http";
|
|
11599
12021
|
import path22 from "path";
|
|
11600
12022
|
import { promisify } from "util";
|
|
11601
|
-
import
|
|
12023
|
+
import matter10 from "gray-matter";
|
|
11602
12024
|
import mime2 from "mime-types";
|
|
11603
12025
|
|
|
11604
12026
|
// src/watch.ts
|
|
@@ -11999,7 +12421,7 @@ async function readViewerPage(rootDir, relativePath) {
|
|
|
11999
12421
|
return null;
|
|
12000
12422
|
}
|
|
12001
12423
|
const raw = await fs18.readFile(absolutePath, "utf8");
|
|
12002
|
-
const parsed =
|
|
12424
|
+
const parsed = matter10(raw);
|
|
12003
12425
|
return {
|
|
12004
12426
|
path: relativePath,
|
|
12005
12427
|
title: typeof parsed.data.title === "string" ? parsed.data.title : path22.basename(relativePath, path22.extname(relativePath)),
|
|
@@ -12102,16 +12524,29 @@ async function startGraphServer(rootDir, port) {
|
|
|
12102
12524
|
const kind = url.searchParams.get("kind") ?? "all";
|
|
12103
12525
|
const status = url.searchParams.get("status") ?? "all";
|
|
12104
12526
|
const project = url.searchParams.get("project") ?? "all";
|
|
12527
|
+
const sourceType = url.searchParams.get("sourceType") ?? "all";
|
|
12105
12528
|
const results = searchPages(paths.searchDbPath, query, {
|
|
12106
12529
|
limit: Number.isFinite(limit) ? limit : 10,
|
|
12107
12530
|
kind,
|
|
12108
12531
|
status,
|
|
12109
|
-
project
|
|
12532
|
+
project,
|
|
12533
|
+
sourceType
|
|
12110
12534
|
});
|
|
12111
12535
|
response.writeHead(200, { "content-type": "application/json" });
|
|
12112
12536
|
response.end(JSON.stringify(results));
|
|
12113
12537
|
return;
|
|
12114
12538
|
}
|
|
12539
|
+
if (url.pathname === "/api/graph-report") {
|
|
12540
|
+
const reportPath = path22.join(paths.wikiDir, "graph", "report.json");
|
|
12541
|
+
if (!await fileExists(reportPath)) {
|
|
12542
|
+
response.writeHead(404, { "content-type": "application/json" });
|
|
12543
|
+
response.end(JSON.stringify({ error: "Graph report artifact not found. Run `swarmvault compile` first." }));
|
|
12544
|
+
return;
|
|
12545
|
+
}
|
|
12546
|
+
response.writeHead(200, { "content-type": "application/json" });
|
|
12547
|
+
response.end(await fs18.readFile(reportPath, "utf8"));
|
|
12548
|
+
return;
|
|
12549
|
+
}
|
|
12115
12550
|
if (url.pathname === "/api/watch-status") {
|
|
12116
12551
|
response.writeHead(200, { "content-type": "application/json" });
|
|
12117
12552
|
response.end(JSON.stringify(await getWatchStatus(rootDir)));
|
|
@@ -12241,6 +12676,7 @@ async function exportGraphHtml(rootDir, outputPath) {
|
|
|
12241
12676
|
title: loaded.title,
|
|
12242
12677
|
kind: page.kind,
|
|
12243
12678
|
status: page.status,
|
|
12679
|
+
sourceType: page.sourceType,
|
|
12244
12680
|
projectIds: page.projectIds,
|
|
12245
12681
|
content: loaded.content,
|
|
12246
12682
|
assets: await Promise.all(
|
|
@@ -12262,7 +12698,8 @@ async function exportGraphHtml(rootDir, outputPath) {
|
|
|
12262
12698
|
}
|
|
12263
12699
|
const script = await fs18.readFile(scriptPath, "utf8");
|
|
12264
12700
|
const style = stylePath && await fileExists(stylePath) ? await fs18.readFile(stylePath, "utf8") : "";
|
|
12265
|
-
const
|
|
12701
|
+
const report = await readJsonFile(path22.join(paths.wikiDir, "graph", "report.json"));
|
|
12702
|
+
const embeddedData = JSON.stringify({ graph, pages: pages.filter(Boolean), report }, null, 2).replace(/</g, "\\u003c");
|
|
12266
12703
|
const html = [
|
|
12267
12704
|
"<!doctype html>",
|
|
12268
12705
|
'<html lang="en">',
|