@swarmvaultai/engine 0.1.22 → 0.1.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -6
- package/dist/chunk-6UPHDGEB.js +1073 -0
- package/dist/index.d.ts +110 -3
- package/dist/index.js +1170 -160
- package/dist/registry-6KZMA3XM.js +12 -0
- package/dist/viewer/assets/index-CmEm2Pd_.js +330 -0
- package/dist/viewer/index.html +1 -1
- package/dist/viewer/lib.d.ts +71 -2
- package/dist/viewer/lib.js +23 -4
- package/package.json +7 -7
- package/LICENSE +0 -21
- package/dist/viewer/assets/index-DEETVhXx.js +0 -330
package/dist/index.js
CHANGED
|
@@ -21,7 +21,7 @@ import {
|
|
|
21
21
|
uniqueBy,
|
|
22
22
|
writeFileIfChanged,
|
|
23
23
|
writeJsonFile
|
|
24
|
-
} from "./chunk-
|
|
24
|
+
} from "./chunk-6UPHDGEB.js";
|
|
25
25
|
|
|
26
26
|
// src/agents.ts
|
|
27
27
|
import fs from "fs/promises";
|
|
@@ -192,6 +192,9 @@ function graphPageById(graph) {
|
|
|
192
192
|
function graphNodeById(graph) {
|
|
193
193
|
return new Map(graph.nodes.map((node) => [node.id, node]));
|
|
194
194
|
}
|
|
195
|
+
function exportHyperedgeNodeId(hyperedge) {
|
|
196
|
+
return `hyperedge:${hyperedge.id}`;
|
|
197
|
+
}
|
|
195
198
|
function sortedCommunities(graph) {
|
|
196
199
|
const known = (graph.communities ?? []).map((community) => ({
|
|
197
200
|
...community,
|
|
@@ -356,6 +359,11 @@ function renderGraphMl(graph) {
|
|
|
356
359
|
{ id: "n_community", for: "node", name: "communityId", type: "string" },
|
|
357
360
|
{ id: "n_degree", for: "node", name: "degree", type: "double" },
|
|
358
361
|
{ id: "n_bridge", for: "node", name: "bridgeScore", type: "double" },
|
|
362
|
+
{ id: "n_relation", for: "node", name: "relation", type: "string" },
|
|
363
|
+
{ id: "n_evidence", for: "node", name: "evidenceClass", type: "string" },
|
|
364
|
+
{ id: "n_confidence", for: "node", name: "confidence", type: "double" },
|
|
365
|
+
{ id: "n_source_pages", for: "node", name: "sourcePageIds", type: "string" },
|
|
366
|
+
{ id: "n_why", for: "node", name: "why", type: "string" },
|
|
359
367
|
{ id: "e_relation", for: "edge", name: "relation", type: "string" },
|
|
360
368
|
{ id: "e_status", for: "edge", name: "status", type: "string" },
|
|
361
369
|
{ id: "e_evidence", for: "edge", name: "evidenceClass", type: "string" },
|
|
@@ -394,6 +402,21 @@ function renderGraphMl(graph) {
|
|
|
394
402
|
}
|
|
395
403
|
lines.push(" </node>");
|
|
396
404
|
}
|
|
405
|
+
for (const hyperedge of [...graph.hyperedges ?? []].sort((left, right) => left.id.localeCompare(right.id))) {
|
|
406
|
+
lines.push(` <node id="${xmlEscape(exportHyperedgeNodeId(hyperedge))}">`);
|
|
407
|
+
for (const [key, value] of [
|
|
408
|
+
["n_label", hyperedge.label],
|
|
409
|
+
["n_type", "hyperedge"],
|
|
410
|
+
["n_relation", hyperedge.relation],
|
|
411
|
+
["n_evidence", hyperedge.evidenceClass],
|
|
412
|
+
["n_confidence", hyperedge.confidence],
|
|
413
|
+
["n_source_pages", hyperedge.sourcePageIds],
|
|
414
|
+
["n_why", hyperedge.why]
|
|
415
|
+
]) {
|
|
416
|
+
lines.push(` <data key="${key}">${xmlEscape(graphMlData(value))}</data>`);
|
|
417
|
+
}
|
|
418
|
+
lines.push(" </node>");
|
|
419
|
+
}
|
|
397
420
|
for (const edge of [...graph.edges].sort((left, right) => left.id.localeCompare(right.id))) {
|
|
398
421
|
lines.push(` <edge id="${xmlEscape(edge.id)}" source="${xmlEscape(edge.source)}" target="${xmlEscape(edge.target)}">`);
|
|
399
422
|
for (const [key, value] of [
|
|
@@ -407,6 +430,23 @@ function renderGraphMl(graph) {
|
|
|
407
430
|
}
|
|
408
431
|
lines.push(" </edge>");
|
|
409
432
|
}
|
|
433
|
+
for (const hyperedge of [...graph.hyperedges ?? []].sort((left, right) => left.id.localeCompare(right.id))) {
|
|
434
|
+
for (const nodeId of hyperedge.nodeIds) {
|
|
435
|
+
lines.push(
|
|
436
|
+
` <edge id="${xmlEscape(`member:${hyperedge.id}:${nodeId}`)}" source="${xmlEscape(exportHyperedgeNodeId(hyperedge))}" target="${xmlEscape(nodeId)}">`
|
|
437
|
+
);
|
|
438
|
+
for (const [key, value] of [
|
|
439
|
+
["e_relation", "group_member"],
|
|
440
|
+
["e_status", "inferred"],
|
|
441
|
+
["e_evidence", hyperedge.evidenceClass],
|
|
442
|
+
["e_confidence", hyperedge.confidence],
|
|
443
|
+
["e_provenance", hyperedge.sourcePageIds]
|
|
444
|
+
]) {
|
|
445
|
+
lines.push(` <data key="${key}">${xmlEscape(graphMlData(value))}</data>`);
|
|
446
|
+
}
|
|
447
|
+
lines.push(" </edge>");
|
|
448
|
+
}
|
|
449
|
+
}
|
|
410
450
|
lines.push(" </graph>", "</graphml>", "");
|
|
411
451
|
return lines.join("\n");
|
|
412
452
|
}
|
|
@@ -433,13 +473,41 @@ function renderCypher(graph) {
|
|
|
433
473
|
lines.push(`MERGE (n:SwarmNode {id: '${cypherEscape(node.id)}'}) SET n += { ${props} };`);
|
|
434
474
|
}
|
|
435
475
|
lines.push("");
|
|
476
|
+
for (const hyperedge of [...graph.hyperedges ?? []].sort((left, right) => left.id.localeCompare(right.id))) {
|
|
477
|
+
const hyperedgeNodeId = exportHyperedgeNodeId(hyperedge);
|
|
478
|
+
lines.push(
|
|
479
|
+
`MERGE (h:SwarmNode {id: '${cypherEscape(hyperedgeNodeId)}'}) SET h += { id: '${cypherEscape(hyperedgeNodeId)}', label: '${cypherEscape(
|
|
480
|
+
hyperedge.label
|
|
481
|
+
)}', type: 'hyperedge', relation: '${cypherEscape(hyperedge.relation)}', evidenceClass: '${cypherEscape(
|
|
482
|
+
hyperedge.evidenceClass
|
|
483
|
+
)}', confidence: ${hyperedge.confidence}, sourcePageIds: '${cypherEscape(JSON.stringify(hyperedge.sourcePageIds))}', why: '${cypherEscape(
|
|
484
|
+
hyperedge.why
|
|
485
|
+
)}' };`
|
|
486
|
+
);
|
|
487
|
+
}
|
|
488
|
+
if ((graph.hyperedges ?? []).length) {
|
|
489
|
+
lines.push("");
|
|
490
|
+
}
|
|
491
|
+
for (const hyperedge of [...graph.hyperedges ?? []].sort((left, right) => left.id.localeCompare(right.id))) {
|
|
492
|
+
const hyperedgeNodeId = exportHyperedgeNodeId(hyperedge);
|
|
493
|
+
for (const nodeId of hyperedge.nodeIds) {
|
|
494
|
+
lines.push(
|
|
495
|
+
`MATCH (h:SwarmNode {id: '${cypherEscape(hyperedgeNodeId)}'}), (n:SwarmNode {id: '${cypherEscape(nodeId)}'})`,
|
|
496
|
+
`MERGE (h)-[r:GROUP_MEMBER {id: '${cypherEscape(`member:${hyperedge.id}:${nodeId}`)}'}]->(n)`,
|
|
497
|
+
`SET r += { relation: 'group_member', status: 'inferred', evidenceClass: '${cypherEscape(
|
|
498
|
+
hyperedge.evidenceClass
|
|
499
|
+
)}', confidence: ${hyperedge.confidence}, provenance: '${cypherEscape(JSON.stringify(hyperedge.sourcePageIds))}' };`
|
|
500
|
+
);
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
lines.push("");
|
|
436
504
|
for (const edge of [...graph.edges].sort((left, right) => left.id.localeCompare(right.id))) {
|
|
437
505
|
lines.push(
|
|
438
506
|
`MATCH (a:SwarmNode {id: '${cypherEscape(edge.source)}'}), (b:SwarmNode {id: '${cypherEscape(edge.target)}'})`,
|
|
439
507
|
`MERGE (a)-[r:${relationType(edge.relation)} {id: '${cypherEscape(edge.id)}'}]->(b)`,
|
|
440
508
|
`SET r += { relation: '${cypherEscape(edge.relation)}', status: '${cypherEscape(edge.status)}', evidenceClass: '${cypherEscape(
|
|
441
509
|
edge.evidenceClass
|
|
442
|
-
)}', confidence: ${edge.confidence}, provenance: '${cypherEscape(JSON.stringify(edge.provenance))}' };`
|
|
510
|
+
)}', confidence: ${edge.confidence}, provenance: '${cypherEscape(JSON.stringify(edge.provenance))}'${edge.similarityReasons?.length ? `, similarityReasons: '${cypherEscape(JSON.stringify(edge.similarityReasons))}'` : ""} };`
|
|
443
511
|
);
|
|
444
512
|
}
|
|
445
513
|
lines.push("");
|
|
@@ -596,6 +664,7 @@ async function uninstallGitHooks(rootDir) {
|
|
|
596
664
|
import fs9 from "fs/promises";
|
|
597
665
|
import path9 from "path";
|
|
598
666
|
import { Readability } from "@mozilla/readability";
|
|
667
|
+
import matter3 from "gray-matter";
|
|
599
668
|
import ignore from "ignore";
|
|
600
669
|
import { JSDOM } from "jsdom";
|
|
601
670
|
import mime from "mime-types";
|
|
@@ -3204,6 +3273,9 @@ async function markPagesStaleForSources(rootDir, sourceIds) {
|
|
|
3204
3273
|
var DEFAULT_MAX_ASSET_SIZE = 10 * 1024 * 1024;
|
|
3205
3274
|
var DEFAULT_MAX_DIRECTORY_FILES = 5e3;
|
|
3206
3275
|
var BUILT_IN_REPO_IGNORES = /* @__PURE__ */ new Set([".git", "node_modules", "dist", "build", ".next", "coverage", ".venv", "vendor", "target"]);
|
|
3276
|
+
function uniqueStrings(values) {
|
|
3277
|
+
return [...new Set(values.filter(Boolean))];
|
|
3278
|
+
}
|
|
3207
3279
|
function inferKind(mimeType, filePath) {
|
|
3208
3280
|
if (inferCodeLanguage(filePath, mimeType)) {
|
|
3209
3281
|
return "code";
|
|
@@ -3321,6 +3393,22 @@ function arxivIdFromInput(input) {
|
|
|
3321
3393
|
return null;
|
|
3322
3394
|
}
|
|
3323
3395
|
}
|
|
3396
|
+
function doiFromInput(input) {
|
|
3397
|
+
const trimmed = input.trim();
|
|
3398
|
+
if (/^10\.\S+\/\S+$/i.test(trimmed)) {
|
|
3399
|
+
return trimmed.replace(/\s+/g, "");
|
|
3400
|
+
}
|
|
3401
|
+
try {
|
|
3402
|
+
const url = new URL(trimmed);
|
|
3403
|
+
if (url.hostname === "doi.org" || url.hostname === "dx.doi.org") {
|
|
3404
|
+
const doi = decodeURIComponent(url.pathname.replace(/^\/+/, ""));
|
|
3405
|
+
return /^10\.\S+\/\S+$/i.test(doi) ? doi : null;
|
|
3406
|
+
}
|
|
3407
|
+
} catch {
|
|
3408
|
+
return null;
|
|
3409
|
+
}
|
|
3410
|
+
return null;
|
|
3411
|
+
}
|
|
3324
3412
|
function isTweetUrl(input) {
|
|
3325
3413
|
try {
|
|
3326
3414
|
const url = new URL(input);
|
|
@@ -3330,26 +3418,25 @@ function isTweetUrl(input) {
|
|
|
3330
3418
|
}
|
|
3331
3419
|
}
|
|
3332
3420
|
function markdownFrontmatter(value) {
|
|
3333
|
-
const
|
|
3334
|
-
|
|
3335
|
-
|
|
3336
|
-
|
|
3337
|
-
|
|
3338
|
-
|
|
3339
|
-
}
|
|
3340
|
-
lines.push("---", "");
|
|
3341
|
-
return lines;
|
|
3421
|
+
const normalized = Object.fromEntries(
|
|
3422
|
+
Object.entries(value).filter(
|
|
3423
|
+
([, rawValue]) => Array.isArray(rawValue) ? rawValue.length > 0 : Boolean(typeof rawValue === "string" ? rawValue.trim() : rawValue)
|
|
3424
|
+
)
|
|
3425
|
+
);
|
|
3426
|
+
return matter3.stringify("", normalized).trimEnd().split("\n").concat([""]);
|
|
3342
3427
|
}
|
|
3343
3428
|
function prepareCapturedMarkdownInput(input) {
|
|
3344
3429
|
return {
|
|
3345
3430
|
title: input.title,
|
|
3346
3431
|
originType: "url",
|
|
3347
3432
|
sourceKind: "markdown",
|
|
3433
|
+
sourceType: input.sourceType,
|
|
3348
3434
|
url: normalizeOriginUrl(input.url),
|
|
3349
3435
|
mimeType: "text/markdown",
|
|
3350
3436
|
storedExtension: ".md",
|
|
3351
3437
|
payloadBytes: Buffer.from(input.markdown, "utf8"),
|
|
3352
3438
|
extractedText: input.markdown,
|
|
3439
|
+
attachments: input.attachments,
|
|
3353
3440
|
logDetails: input.logDetails
|
|
3354
3441
|
};
|
|
3355
3442
|
}
|
|
@@ -3360,6 +3447,17 @@ async function fetchText(url) {
|
|
|
3360
3447
|
}
|
|
3361
3448
|
return response.text();
|
|
3362
3449
|
}
|
|
3450
|
+
async function fetchResolvedText(url) {
|
|
3451
|
+
const response = await fetch(url);
|
|
3452
|
+
if (!response.ok) {
|
|
3453
|
+
throw new Error(`Failed to fetch ${url}: ${response.status} ${response.statusText}`);
|
|
3454
|
+
}
|
|
3455
|
+
return {
|
|
3456
|
+
text: await response.text(),
|
|
3457
|
+
finalUrl: normalizeOriginUrl(response.url || url),
|
|
3458
|
+
contentType: response.headers.get("content-type")?.split(";")[0]?.trim() || "text/html"
|
|
3459
|
+
};
|
|
3460
|
+
}
|
|
3363
3461
|
function domTextFromHtml(html, baseUrl) {
|
|
3364
3462
|
const dom = new JSDOM(`<body>${html}</body>`, { url: baseUrl });
|
|
3365
3463
|
return normalizeWhitespace(dom.window.document.body.textContent ?? "");
|
|
@@ -3379,11 +3477,16 @@ async function captureArxivMarkdown(input, options) {
|
|
|
3379
3477
|
const authors = [...document.querySelectorAll('meta[name="citation_author"]')].map((node) => node.getAttribute("content")?.trim()).filter((value) => Boolean(value));
|
|
3380
3478
|
const authorsText = authors.join(", ") || stripLeadingLabel(document.querySelector(".authors")?.textContent?.trim() ?? "", "Authors:");
|
|
3381
3479
|
const abstract = stripLeadingLabel(document.querySelector("blockquote.abstract")?.textContent?.trim() ?? "", "Abstract:");
|
|
3480
|
+
const categories = [...document.querySelectorAll(".subheader .primary-subject, .metatable .tablecell.subjects")].flatMap((node) => (node.textContent ?? "").split(/;/g)).map((value) => value.trim()).filter(Boolean);
|
|
3382
3481
|
const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
3383
3482
|
const markdown = [
|
|
3384
3483
|
...markdownFrontmatter({
|
|
3385
|
-
|
|
3484
|
+
source_type: "arxiv",
|
|
3386
3485
|
source_url: normalizedUrl,
|
|
3486
|
+
canonical_url: normalizedUrl,
|
|
3487
|
+
title,
|
|
3488
|
+
authors,
|
|
3489
|
+
tags: uniqueStrings(categories),
|
|
3387
3490
|
arxiv_id: arxivId,
|
|
3388
3491
|
author: options.author,
|
|
3389
3492
|
contributor: options.contributor,
|
|
@@ -3423,8 +3526,11 @@ async function captureTweetMarkdown(input, options) {
|
|
|
3423
3526
|
const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
3424
3527
|
const markdown = [
|
|
3425
3528
|
...markdownFrontmatter({
|
|
3426
|
-
|
|
3529
|
+
source_type: "tweet",
|
|
3427
3530
|
source_url: normalizedUrl,
|
|
3531
|
+
canonical_url: canonicalUrl,
|
|
3532
|
+
title,
|
|
3533
|
+
authors: postAuthor ? [postAuthor] : void 0,
|
|
3428
3534
|
author: options.author,
|
|
3429
3535
|
contributor: options.contributor,
|
|
3430
3536
|
captured_at: capturedAt
|
|
@@ -3446,6 +3552,101 @@ async function captureTweetMarkdown(input, options) {
|
|
|
3446
3552
|
].join("\n");
|
|
3447
3553
|
return { title, normalizedUrl, markdown };
|
|
3448
3554
|
}
|
|
3555
|
+
function firstMetaContent(document, selectors) {
|
|
3556
|
+
for (const selector of selectors) {
|
|
3557
|
+
const value = document.querySelector(selector)?.getAttribute("content")?.trim();
|
|
3558
|
+
if (value) {
|
|
3559
|
+
return value;
|
|
3560
|
+
}
|
|
3561
|
+
}
|
|
3562
|
+
return void 0;
|
|
3563
|
+
}
|
|
3564
|
+
function metaContents(document, selectors) {
|
|
3565
|
+
return uniqueStrings(
|
|
3566
|
+
selectors.flatMap(
|
|
3567
|
+
(selector) => [...document.querySelectorAll(selector)].map((node) => node.getAttribute("content")?.trim() ?? "").filter(Boolean)
|
|
3568
|
+
)
|
|
3569
|
+
);
|
|
3570
|
+
}
|
|
3571
|
+
function splitKeywords(value) {
|
|
3572
|
+
return uniqueStrings(
|
|
3573
|
+
(value ?? "").split(/[;,]/g).map((item) => item.trim()).filter(Boolean)
|
|
3574
|
+
);
|
|
3575
|
+
}
|
|
3576
|
+
async function captureArticleMarkdown(rootDir, input, options, extra = { sourceType: "article" }) {
|
|
3577
|
+
const resolved = await fetchResolvedText(input);
|
|
3578
|
+
if (!resolved.contentType.includes("html")) {
|
|
3579
|
+
throw new Error(`Unsupported article content type: ${resolved.contentType}`);
|
|
3580
|
+
}
|
|
3581
|
+
const dom = new JSDOM(resolved.text, { url: resolved.finalUrl });
|
|
3582
|
+
const document = dom.window.document;
|
|
3583
|
+
const canonicalHref = document.querySelector('link[rel="canonical"]')?.getAttribute("href")?.trim();
|
|
3584
|
+
const canonicalUrl = canonicalHref ? normalizeOriginUrl(new URL(canonicalHref, resolved.finalUrl).toString()) : resolved.finalUrl;
|
|
3585
|
+
const title = firstMetaContent(document, ['meta[name="citation_title"]', 'meta[property="og:title"]', 'meta[name="twitter:title"]']) ?? (document.title.trim() || canonicalUrl);
|
|
3586
|
+
const authors = uniqueStrings([
|
|
3587
|
+
...metaContents(document, ['meta[name="citation_author"]']),
|
|
3588
|
+
...metaContents(document, ['meta[name="author"]', 'meta[property="article:author"]'])
|
|
3589
|
+
]);
|
|
3590
|
+
const publishedAt = firstMetaContent(document, [
|
|
3591
|
+
'meta[name="citation_publication_date"]',
|
|
3592
|
+
'meta[name="citation_online_date"]',
|
|
3593
|
+
'meta[property="article:published_time"]',
|
|
3594
|
+
'meta[name="pubdate"]'
|
|
3595
|
+
]);
|
|
3596
|
+
const updatedAt = firstMetaContent(document, ['meta[property="article:modified_time"]', 'meta[name="lastmod"]']);
|
|
3597
|
+
const tags = uniqueStrings([
|
|
3598
|
+
...metaContents(document, ['meta[property="article:tag"]']),
|
|
3599
|
+
...splitKeywords(firstMetaContent(document, ['meta[name="keywords"]']))
|
|
3600
|
+
]);
|
|
3601
|
+
const inferredDoi = extra.doi ?? firstMetaContent(document, ['meta[name="citation_doi"]', 'meta[name="dc.identifier"]'])?.replace(/^doi:\s*/i, "") ?? void 0;
|
|
3602
|
+
const normalizedOptions = normalizeIngestOptions(options);
|
|
3603
|
+
const prepared = await prepareUrlInput(rootDir, canonicalUrl, normalizedOptions);
|
|
3604
|
+
if (prepared.sourceKind !== "markdown" && prepared.sourceKind !== "text") {
|
|
3605
|
+
throw new Error(`Unsupported prepared article kind: ${prepared.sourceKind}`);
|
|
3606
|
+
}
|
|
3607
|
+
const body = prepared.extractedText ?? prepared.payloadBytes.toString("utf8");
|
|
3608
|
+
const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
3609
|
+
const markdown = [
|
|
3610
|
+
...markdownFrontmatter({
|
|
3611
|
+
source_type: extra.sourceType,
|
|
3612
|
+
source_url: extra.sourceUrl ?? input,
|
|
3613
|
+
canonical_url: canonicalUrl,
|
|
3614
|
+
title,
|
|
3615
|
+
authors,
|
|
3616
|
+
published_at: publishedAt,
|
|
3617
|
+
updated_at: updatedAt,
|
|
3618
|
+
doi: inferredDoi,
|
|
3619
|
+
tags,
|
|
3620
|
+
author: options.author,
|
|
3621
|
+
contributor: options.contributor,
|
|
3622
|
+
captured_at: capturedAt
|
|
3623
|
+
}),
|
|
3624
|
+
body.trim(),
|
|
3625
|
+
"",
|
|
3626
|
+
"## Source",
|
|
3627
|
+
"",
|
|
3628
|
+
`- URL: ${canonicalUrl}`,
|
|
3629
|
+
...extra.sourceType === "doi" && inferredDoi ? [`- DOI: ${inferredDoi}`] : [],
|
|
3630
|
+
""
|
|
3631
|
+
].join("\n");
|
|
3632
|
+
return {
|
|
3633
|
+
title,
|
|
3634
|
+
normalizedUrl: canonicalUrl,
|
|
3635
|
+
markdown,
|
|
3636
|
+
attachments: prepared.attachments
|
|
3637
|
+
};
|
|
3638
|
+
}
|
|
3639
|
+
async function captureDoiMarkdown(rootDir, input, options) {
|
|
3640
|
+
const doi = doiFromInput(input);
|
|
3641
|
+
if (!doi) {
|
|
3642
|
+
throw new Error(`Could not determine a DOI from ${input}`);
|
|
3643
|
+
}
|
|
3644
|
+
return captureArticleMarkdown(rootDir, `https://doi.org/${encodeURIComponent(doi)}`, options, {
|
|
3645
|
+
sourceType: "doi",
|
|
3646
|
+
sourceUrl: input,
|
|
3647
|
+
doi
|
|
3648
|
+
});
|
|
3649
|
+
}
|
|
3449
3650
|
function manifestMatchesOrigin(manifest, prepared) {
|
|
3450
3651
|
if (prepared.originType === "url") {
|
|
3451
3652
|
return Boolean(prepared.url && manifest.url && normalizeOriginUrl(manifest.url) === normalizeOriginUrl(prepared.url));
|
|
@@ -3789,7 +3990,7 @@ async function persistPreparedInput(rootDir, prepared, paths) {
|
|
|
3789
3990
|
const extractionHash = prepared.extractionHash ?? buildExtractionHash(prepared.extractedText, prepared.extractionArtifact);
|
|
3790
3991
|
const existingByOrigin = await readManifestByOrigin(paths.manifestsDir, prepared);
|
|
3791
3992
|
const existingByHash = existingByOrigin ? null : await readManifestByHash(paths.manifestsDir, contentHash);
|
|
3792
|
-
if (existingByOrigin && existingByOrigin.contentHash === contentHash && existingByOrigin.extractionHash === extractionHash && existingByOrigin.title === prepared.title && existingByOrigin.sourceKind === prepared.sourceKind && existingByOrigin.language === prepared.language && existingByOrigin.mimeType === prepared.mimeType && existingByOrigin.repoRelativePath === prepared.repoRelativePath) {
|
|
3993
|
+
if (existingByOrigin && existingByOrigin.contentHash === contentHash && existingByOrigin.extractionHash === extractionHash && existingByOrigin.title === prepared.title && existingByOrigin.sourceKind === prepared.sourceKind && existingByOrigin.sourceType === prepared.sourceType && existingByOrigin.language === prepared.language && existingByOrigin.mimeType === prepared.mimeType && existingByOrigin.repoRelativePath === prepared.repoRelativePath) {
|
|
3793
3994
|
return { manifest: existingByOrigin, isNew: false, wasUpdated: false };
|
|
3794
3995
|
}
|
|
3795
3996
|
if (existingByHash) {
|
|
@@ -3835,6 +4036,7 @@ async function persistPreparedInput(rootDir, prepared, paths) {
|
|
|
3835
4036
|
title: prepared.title,
|
|
3836
4037
|
originType: prepared.originType,
|
|
3837
4038
|
sourceKind: prepared.sourceKind,
|
|
4039
|
+
sourceType: prepared.sourceType,
|
|
3838
4040
|
language: prepared.language,
|
|
3839
4041
|
originalPath: prepared.originalPath,
|
|
3840
4042
|
repoRelativePath: prepared.repoRelativePath,
|
|
@@ -3892,7 +4094,7 @@ function repoSyncWorkspaceIgnorePaths(rootDir, paths, repoRoot) {
|
|
|
3892
4094
|
return candidates.map((candidate) => path9.resolve(candidate)).filter((candidate, index, items) => items.indexOf(candidate) === index).filter((candidate) => withinRoot(repoRoot, candidate));
|
|
3893
4095
|
}
|
|
3894
4096
|
function preparedMatchesManifest(manifest, prepared, contentHash) {
|
|
3895
|
-
return manifest.contentHash === contentHash && manifest.extractionHash === (prepared.extractionHash ?? buildExtractionHash(prepared.extractedText, prepared.extractionArtifact)) && manifest.title === prepared.title && manifest.sourceKind === prepared.sourceKind && manifest.language === prepared.language && manifest.mimeType === prepared.mimeType && manifest.repoRelativePath === prepared.repoRelativePath;
|
|
4097
|
+
return manifest.contentHash === contentHash && manifest.extractionHash === (prepared.extractionHash ?? buildExtractionHash(prepared.extractedText, prepared.extractionArtifact)) && manifest.title === prepared.title && manifest.sourceKind === prepared.sourceKind && manifest.sourceType === prepared.sourceType && manifest.language === prepared.language && manifest.mimeType === prepared.mimeType && manifest.repoRelativePath === prepared.repoRelativePath;
|
|
3896
4098
|
}
|
|
3897
4099
|
function shouldDeferWatchSemanticRefresh(sourceKind) {
|
|
3898
4100
|
return sourceKind === "markdown" || sourceKind === "text" || sourceKind === "html" || sourceKind === "pdf" || sourceKind === "image";
|
|
@@ -4184,7 +4386,8 @@ async function prepareUrlInput(rootDir, input, options) {
|
|
|
4184
4386
|
if (!response.ok) {
|
|
4185
4387
|
throw new Error(`Failed to fetch ${input}: ${response.status} ${response.statusText}`);
|
|
4186
4388
|
}
|
|
4187
|
-
const
|
|
4389
|
+
const finalUrl = normalizeOriginUrl(response.url || input);
|
|
4390
|
+
const inputUrl = new URL(finalUrl);
|
|
4188
4391
|
const originalPayloadBytes = Buffer.from(await response.arrayBuffer());
|
|
4189
4392
|
let payloadBytes = originalPayloadBytes;
|
|
4190
4393
|
let mimeType = resolveUrlMimeType(input, response);
|
|
@@ -4199,13 +4402,13 @@ async function prepareUrlInput(rootDir, input, options) {
|
|
|
4199
4402
|
const logDetails = [];
|
|
4200
4403
|
if (sourceKind === "html" || mimeType.startsWith("text/html")) {
|
|
4201
4404
|
const html = originalPayloadBytes.toString("utf8");
|
|
4202
|
-
const initialConversion = await convertHtmlToMarkdown(html,
|
|
4405
|
+
const initialConversion = await convertHtmlToMarkdown(html, finalUrl);
|
|
4203
4406
|
title = initialConversion.title;
|
|
4204
4407
|
let localizedHtml = html;
|
|
4205
4408
|
let localAssetReplacements;
|
|
4206
4409
|
if (options.includeAssets) {
|
|
4207
4410
|
const { attachments: remoteAttachments, skippedCount } = await collectRemoteImageAttachments(
|
|
4208
|
-
extractHtmlImageReferences(html,
|
|
4411
|
+
extractHtmlImageReferences(html, finalUrl),
|
|
4209
4412
|
options
|
|
4210
4413
|
);
|
|
4211
4414
|
if (remoteAttachments.length) {
|
|
@@ -4215,19 +4418,19 @@ async function prepareUrlInput(rootDir, input, options) {
|
|
|
4215
4418
|
localAssetReplacements = new Map(
|
|
4216
4419
|
remoteAttachments.map((attachment) => [attachment.originalPath ?? "", `../assets/${sourceId}/${attachment.relativePath}`])
|
|
4217
4420
|
);
|
|
4218
|
-
localizedHtml = rewriteHtmlImageReferences(html,
|
|
4421
|
+
localizedHtml = rewriteHtmlImageReferences(html, finalUrl, localAssetReplacements);
|
|
4219
4422
|
logDetails.push(`remote_assets=${remoteAttachments.length}`);
|
|
4220
4423
|
}
|
|
4221
4424
|
if (skippedCount) {
|
|
4222
4425
|
logDetails.push(`remote_asset_skips=${skippedCount}`);
|
|
4223
4426
|
}
|
|
4224
4427
|
}
|
|
4225
|
-
const converted = localizedHtml === html && !attachments?.length ? initialConversion : await convertHtmlToMarkdown(localizedHtml,
|
|
4428
|
+
const converted = localizedHtml === html && !attachments?.length ? initialConversion : await convertHtmlToMarkdown(localizedHtml, finalUrl);
|
|
4226
4429
|
extractedText = converted.markdown;
|
|
4227
4430
|
extractionArtifact = createHtmlReadabilityExtractionArtifact("markdown", "text/markdown");
|
|
4228
4431
|
if (localAssetReplacements?.size) {
|
|
4229
4432
|
const absoluteLocalAssetReplacements = new Map(
|
|
4230
|
-
[...localAssetReplacements.values()].map((replacement) => [new URL(replacement,
|
|
4433
|
+
[...localAssetReplacements.values()].map((replacement) => [new URL(replacement, finalUrl).toString(), replacement])
|
|
4231
4434
|
);
|
|
4232
4435
|
extractedText = rewriteMarkdownImageTargets(extractedText, absoluteLocalAssetReplacements);
|
|
4233
4436
|
}
|
|
@@ -4244,7 +4447,7 @@ async function prepareUrlInput(rootDir, input, options) {
|
|
|
4244
4447
|
extractionArtifact = createPlainTextExtractionArtifact(sourceKind, mimeType);
|
|
4245
4448
|
if (sourceKind === "markdown" && options.includeAssets) {
|
|
4246
4449
|
const { attachments: remoteAttachments, skippedCount } = await collectRemoteImageAttachments(
|
|
4247
|
-
extractMarkdownImageReferences(extractedText,
|
|
4450
|
+
extractMarkdownImageReferences(extractedText, finalUrl),
|
|
4248
4451
|
options
|
|
4249
4452
|
);
|
|
4250
4453
|
if (remoteAttachments.length) {
|
|
@@ -4254,7 +4457,7 @@ async function prepareUrlInput(rootDir, input, options) {
|
|
|
4254
4457
|
const replacements = new Map(
|
|
4255
4458
|
remoteAttachments.map((attachment) => [attachment.originalPath ?? "", `../assets/${sourceId}/${attachment.relativePath}`])
|
|
4256
4459
|
);
|
|
4257
|
-
extractedText = rewriteMarkdownImageReferences(extractedText,
|
|
4460
|
+
extractedText = rewriteMarkdownImageReferences(extractedText, finalUrl, replacements);
|
|
4258
4461
|
payloadBytes = Buffer.from(extractedText, "utf8");
|
|
4259
4462
|
logDetails.push(`remote_assets=${remoteAttachments.length}`);
|
|
4260
4463
|
}
|
|
@@ -4282,7 +4485,7 @@ async function prepareUrlInput(rootDir, input, options) {
|
|
|
4282
4485
|
originType: "url",
|
|
4283
4486
|
sourceKind,
|
|
4284
4487
|
language,
|
|
4285
|
-
url:
|
|
4488
|
+
url: finalUrl,
|
|
4286
4489
|
mimeType,
|
|
4287
4490
|
storedExtension,
|
|
4288
4491
|
payloadBytes,
|
|
@@ -4395,8 +4598,8 @@ async function ingestInput(rootDir, input, options) {
|
|
|
4395
4598
|
}
|
|
4396
4599
|
async function addInput(rootDir, input, options = {}) {
|
|
4397
4600
|
const { paths } = await initWorkspace(rootDir);
|
|
4398
|
-
if (!isHttpUrl(input) && !arxivIdFromInput(input)) {
|
|
4399
|
-
throw new Error("`swarmvault add` only supports URLs
|
|
4601
|
+
if (!isHttpUrl(input) && !arxivIdFromInput(input) && !doiFromInput(input)) {
|
|
4602
|
+
throw new Error("`swarmvault add` only supports URLs, bare arXiv ids, and bare DOI strings in the current release.");
|
|
4400
4603
|
}
|
|
4401
4604
|
let prepared = null;
|
|
4402
4605
|
let captureType = "url";
|
|
@@ -4409,26 +4612,55 @@ async function addInput(rootDir, input, options = {}) {
|
|
|
4409
4612
|
title: captured.title,
|
|
4410
4613
|
url: captured.normalizedUrl,
|
|
4411
4614
|
markdown: captured.markdown,
|
|
4615
|
+
sourceType: "arxiv",
|
|
4412
4616
|
logDetails: ["capture_type=arxiv"]
|
|
4413
4617
|
});
|
|
4414
4618
|
captureType = "arxiv";
|
|
4415
4619
|
normalizedUrl = captured.normalizedUrl;
|
|
4620
|
+
} else if (doiFromInput(input)) {
|
|
4621
|
+
const captured = await captureDoiMarkdown(rootDir, input, options);
|
|
4622
|
+
prepared = prepareCapturedMarkdownInput({
|
|
4623
|
+
title: captured.title,
|
|
4624
|
+
url: captured.normalizedUrl,
|
|
4625
|
+
markdown: captured.markdown,
|
|
4626
|
+
sourceType: "doi",
|
|
4627
|
+
attachments: captured.attachments,
|
|
4628
|
+
logDetails: ["capture_type=doi"]
|
|
4629
|
+
});
|
|
4630
|
+
captureType = "doi";
|
|
4631
|
+
normalizedUrl = captured.normalizedUrl;
|
|
4416
4632
|
} else if (isTweetUrl(input)) {
|
|
4417
4633
|
const captured = await captureTweetMarkdown(input, options);
|
|
4418
4634
|
prepared = prepareCapturedMarkdownInput({
|
|
4419
4635
|
title: captured.title,
|
|
4420
4636
|
url: captured.normalizedUrl,
|
|
4421
4637
|
markdown: captured.markdown,
|
|
4638
|
+
sourceType: "tweet",
|
|
4422
4639
|
logDetails: ["capture_type=tweet"]
|
|
4423
4640
|
});
|
|
4424
4641
|
captureType = "tweet";
|
|
4425
4642
|
normalizedUrl = captured.normalizedUrl;
|
|
4643
|
+
} else if (isHttpUrl(input)) {
|
|
4644
|
+
const captured = await captureArticleMarkdown(rootDir, input, options, {
|
|
4645
|
+
sourceType: "article",
|
|
4646
|
+
sourceUrl: input
|
|
4647
|
+
});
|
|
4648
|
+
prepared = prepareCapturedMarkdownInput({
|
|
4649
|
+
title: captured.title,
|
|
4650
|
+
url: captured.normalizedUrl,
|
|
4651
|
+
markdown: captured.markdown,
|
|
4652
|
+
sourceType: "article",
|
|
4653
|
+
attachments: captured.attachments,
|
|
4654
|
+
logDetails: ["capture_type=article"]
|
|
4655
|
+
});
|
|
4656
|
+
captureType = "article";
|
|
4657
|
+
normalizedUrl = captured.normalizedUrl;
|
|
4426
4658
|
}
|
|
4427
4659
|
} catch {
|
|
4428
4660
|
fallback = true;
|
|
4429
4661
|
}
|
|
4430
4662
|
if (!prepared) {
|
|
4431
|
-
normalizedUrl = arxivIdFromInput(input) ? `https://arxiv.org/abs/${arxivIdFromInput(input)}` : normalizeOriginUrl(input);
|
|
4663
|
+
normalizedUrl = arxivIdFromInput(input) ? `https://arxiv.org/abs/${arxivIdFromInput(input)}` : doiFromInput(input) ? `https://doi.org/${encodeURIComponent(doiFromInput(input) ?? "")}` : normalizeOriginUrl(input);
|
|
4432
4664
|
return {
|
|
4433
4665
|
captureType: "url",
|
|
4434
4666
|
manifest: await ingestInput(rootDir, normalizedUrl, options),
|
|
@@ -4684,7 +4916,7 @@ function buildSchemaPrompt(schema, instruction) {
|
|
|
4684
4916
|
// src/vault.ts
|
|
4685
4917
|
import fs15 from "fs/promises";
|
|
4686
4918
|
import path18 from "path";
|
|
4687
|
-
import
|
|
4919
|
+
import matter9 from "gray-matter";
|
|
4688
4920
|
import { z as z7 } from "zod";
|
|
4689
4921
|
|
|
4690
4922
|
// src/analysis.ts
|
|
@@ -4991,6 +5223,7 @@ var DEFAULT_BENCHMARK_QUESTIONS = [
|
|
|
4991
5223
|
"Where are the biggest knowledge gaps?",
|
|
4992
5224
|
"What evidence should I read first?"
|
|
4993
5225
|
];
|
|
5226
|
+
var RESEARCH_BENCHMARK_QUESTION = "Which research sources should I read first, and why?";
|
|
4994
5227
|
function nodeMap(graph) {
|
|
4995
5228
|
return new Map(graph.nodes.map((node) => [node.id, node]));
|
|
4996
5229
|
}
|
|
@@ -5040,9 +5273,68 @@ function benchmarkQueryTokens(graph, queryResult, pageContentsById) {
|
|
|
5040
5273
|
queryTokens,
|
|
5041
5274
|
reduction: 0,
|
|
5042
5275
|
visitedNodeIds: queryResult.visitedNodeIds,
|
|
5276
|
+
visitedEdgeIds: queryResult.visitedEdgeIds,
|
|
5043
5277
|
pageIds: queryResult.pageIds
|
|
5044
5278
|
};
|
|
5045
5279
|
}
|
|
5280
|
+
function graphHash(graph) {
|
|
5281
|
+
const hashedPages = graph.pages.filter((page) => page.kind !== "graph_report" && page.kind !== "community_summary");
|
|
5282
|
+
const normalized = JSON.stringify(
|
|
5283
|
+
{
|
|
5284
|
+
nodes: [...graph.nodes].map((node) => ({
|
|
5285
|
+
id: node.id,
|
|
5286
|
+
type: node.type,
|
|
5287
|
+
label: node.label,
|
|
5288
|
+
pageId: node.pageId ?? null,
|
|
5289
|
+
communityId: node.communityId ?? null,
|
|
5290
|
+
degree: node.degree ?? null,
|
|
5291
|
+
bridgeScore: node.bridgeScore ?? null,
|
|
5292
|
+
isGodNode: node.isGodNode ?? false,
|
|
5293
|
+
sourceIds: [...node.sourceIds].sort(),
|
|
5294
|
+
projectIds: [...node.projectIds].sort()
|
|
5295
|
+
})).sort((left, right) => left.id.localeCompare(right.id)),
|
|
5296
|
+
edges: [...graph.edges].map((edge) => ({
|
|
5297
|
+
id: edge.id,
|
|
5298
|
+
source: edge.source,
|
|
5299
|
+
target: edge.target,
|
|
5300
|
+
relation: edge.relation,
|
|
5301
|
+
status: edge.status,
|
|
5302
|
+
evidenceClass: edge.evidenceClass,
|
|
5303
|
+
confidence: edge.confidence,
|
|
5304
|
+
provenance: [...edge.provenance].sort()
|
|
5305
|
+
})).sort((left, right) => left.id.localeCompare(right.id)),
|
|
5306
|
+
pages: [...hashedPages].map((page) => ({
|
|
5307
|
+
id: page.id,
|
|
5308
|
+
path: page.path,
|
|
5309
|
+
kind: page.kind,
|
|
5310
|
+
status: page.status,
|
|
5311
|
+
sourceType: page.sourceType ?? null,
|
|
5312
|
+
sourceIds: [...page.sourceIds].sort(),
|
|
5313
|
+
projectIds: [...page.projectIds].sort(),
|
|
5314
|
+
nodeIds: [...page.nodeIds].sort()
|
|
5315
|
+
})).sort((left, right) => left.id.localeCompare(right.id)),
|
|
5316
|
+
communities: [...graph.communities ?? []].map((community) => ({
|
|
5317
|
+
id: community.id,
|
|
5318
|
+
label: community.label,
|
|
5319
|
+
nodeIds: [...community.nodeIds].sort()
|
|
5320
|
+
})).sort((left, right) => left.id.localeCompare(right.id))
|
|
5321
|
+
},
|
|
5322
|
+
null,
|
|
5323
|
+
0
|
|
5324
|
+
);
|
|
5325
|
+
return sha256(normalized);
|
|
5326
|
+
}
|
|
5327
|
+
function hasResearchSources(pages) {
|
|
5328
|
+
return pages.some((page) => page.kind === "source" && Boolean(page.sourceType) && page.sourceType !== "url");
|
|
5329
|
+
}
|
|
5330
|
+
function defaultBenchmarkQuestionsForGraph(graph, maxQuestions = 3) {
|
|
5331
|
+
const normalizedLimit = Math.max(1, Math.min(maxQuestions, DEFAULT_BENCHMARK_QUESTIONS.length));
|
|
5332
|
+
const questions = [...DEFAULT_BENCHMARK_QUESTIONS];
|
|
5333
|
+
if (hasResearchSources(graph.pages)) {
|
|
5334
|
+
questions.unshift(RESEARCH_BENCHMARK_QUESTION);
|
|
5335
|
+
}
|
|
5336
|
+
return uniqueBy(questions, (item) => item).slice(0, normalizedLimit);
|
|
5337
|
+
}
|
|
5046
5338
|
function buildBenchmarkArtifact(input) {
|
|
5047
5339
|
const corpusTokens = Math.max(1, Math.round(input.corpusWords * (100 / 75)));
|
|
5048
5340
|
const perQuestion = input.perQuestion.filter((entry) => entry.queryTokens > 0).map((entry) => ({
|
|
@@ -5051,8 +5343,18 @@ function buildBenchmarkArtifact(input) {
|
|
|
5051
5343
|
}));
|
|
5052
5344
|
const avgQueryTokens = perQuestion.length ? Math.max(1, Math.round(perQuestion.reduce((total, entry) => total + entry.queryTokens, 0) / perQuestion.length)) : 0;
|
|
5053
5345
|
const reductionRatio = avgQueryTokens ? Number(Math.max(0, 1 - avgQueryTokens / Math.max(1, corpusTokens)).toFixed(3)) : 0;
|
|
5346
|
+
const uniqueVisitedNodes = new Set(perQuestion.flatMap((entry) => entry.visitedNodeIds)).size;
|
|
5347
|
+
const summary = {
|
|
5348
|
+
questionCount: input.questions.length,
|
|
5349
|
+
uniqueVisitedNodes,
|
|
5350
|
+
finalContextTokens: avgQueryTokens,
|
|
5351
|
+
naiveCorpusTokens: corpusTokens,
|
|
5352
|
+
avgReduction: reductionRatio,
|
|
5353
|
+
reductionRatio
|
|
5354
|
+
};
|
|
5054
5355
|
return {
|
|
5055
5356
|
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
5357
|
+
graphHash: graphHash(input.graph),
|
|
5056
5358
|
corpusWords: input.corpusWords,
|
|
5057
5359
|
corpusTokens,
|
|
5058
5360
|
nodes: input.graph.nodes.length,
|
|
@@ -5060,7 +5362,9 @@ function buildBenchmarkArtifact(input) {
|
|
|
5060
5362
|
avgQueryTokens,
|
|
5061
5363
|
reductionRatio,
|
|
5062
5364
|
sampleQuestions: input.questions,
|
|
5063
|
-
perQuestion
|
|
5365
|
+
perQuestion,
|
|
5366
|
+
questionResults: perQuestion,
|
|
5367
|
+
summary
|
|
5064
5368
|
};
|
|
5065
5369
|
}
|
|
5066
5370
|
|
|
@@ -5083,7 +5387,7 @@ function conflictConfidence(claimA, claimB) {
|
|
|
5083
5387
|
// src/deep-lint.ts
|
|
5084
5388
|
import fs11 from "fs/promises";
|
|
5085
5389
|
import path14 from "path";
|
|
5086
|
-
import
|
|
5390
|
+
import matter4 from "gray-matter";
|
|
5087
5391
|
import { z as z5 } from "zod";
|
|
5088
5392
|
|
|
5089
5393
|
// src/findings.ts
|
|
@@ -5450,7 +5754,7 @@ async function loadContextPages(rootDir, graph) {
|
|
|
5450
5754
|
contextPages.slice(0, 18).map(async (page) => {
|
|
5451
5755
|
const absolutePath = path14.join(paths.wikiDir, page.path);
|
|
5452
5756
|
const raw = await fs11.readFile(absolutePath, "utf8").catch(() => "");
|
|
5453
|
-
const parsed =
|
|
5757
|
+
const parsed = matter4(raw);
|
|
5454
5758
|
return {
|
|
5455
5759
|
id: page.id,
|
|
5456
5760
|
title: page.title,
|
|
@@ -5674,6 +5978,331 @@ async function runDeepLint(rootDir, structuralFindings, options = {}) {
|
|
|
5674
5978
|
);
|
|
5675
5979
|
}
|
|
5676
5980
|
|
|
5981
|
+
// src/graph-enrichment.ts
|
|
5982
|
+
var STOPWORDS2 = /* @__PURE__ */ new Set([
|
|
5983
|
+
"about",
|
|
5984
|
+
"after",
|
|
5985
|
+
"also",
|
|
5986
|
+
"among",
|
|
5987
|
+
"and",
|
|
5988
|
+
"around",
|
|
5989
|
+
"because",
|
|
5990
|
+
"been",
|
|
5991
|
+
"being",
|
|
5992
|
+
"between",
|
|
5993
|
+
"both",
|
|
5994
|
+
"does",
|
|
5995
|
+
"from",
|
|
5996
|
+
"into",
|
|
5997
|
+
"just",
|
|
5998
|
+
"like",
|
|
5999
|
+
"many",
|
|
6000
|
+
"more",
|
|
6001
|
+
"most",
|
|
6002
|
+
"much",
|
|
6003
|
+
"note",
|
|
6004
|
+
"only",
|
|
6005
|
+
"other",
|
|
6006
|
+
"over",
|
|
6007
|
+
"same",
|
|
6008
|
+
"such",
|
|
6009
|
+
"than",
|
|
6010
|
+
"that",
|
|
6011
|
+
"their",
|
|
6012
|
+
"them",
|
|
6013
|
+
"there",
|
|
6014
|
+
"these",
|
|
6015
|
+
"this",
|
|
6016
|
+
"through",
|
|
6017
|
+
"under",
|
|
6018
|
+
"very",
|
|
6019
|
+
"what",
|
|
6020
|
+
"when",
|
|
6021
|
+
"where",
|
|
6022
|
+
"which",
|
|
6023
|
+
"while",
|
|
6024
|
+
"with",
|
|
6025
|
+
"would",
|
|
6026
|
+
"your"
|
|
6027
|
+
]);
|
|
6028
|
+
function normalizeValue(value) {
|
|
6029
|
+
return normalizeWhitespace(value).toLowerCase();
|
|
6030
|
+
}
|
|
6031
|
+
function addFeature(bucket, reason, value) {
|
|
6032
|
+
if (!value) {
|
|
6033
|
+
return;
|
|
6034
|
+
}
|
|
6035
|
+
const normalized = normalizeValue(value);
|
|
6036
|
+
if (!normalized) {
|
|
6037
|
+
return;
|
|
6038
|
+
}
|
|
6039
|
+
if (!bucket.has(reason)) {
|
|
6040
|
+
bucket.set(reason, /* @__PURE__ */ new Set());
|
|
6041
|
+
}
|
|
6042
|
+
bucket.get(reason)?.add(normalized);
|
|
6043
|
+
}
|
|
6044
|
+
function themeTokens(value) {
|
|
6045
|
+
return uniqueBy(
|
|
6046
|
+
normalizeValue(value).split(/[^a-z0-9]+/i).filter((token) => token.length >= 4 && !STOPWORDS2.has(token)),
|
|
6047
|
+
(token) => token
|
|
6048
|
+
).slice(0, 6);
|
|
6049
|
+
}
|
|
6050
|
+
function pairKey(left, right) {
|
|
6051
|
+
return [left, right].sort((a, b) => a.localeCompare(b)).join("|");
|
|
6052
|
+
}
|
|
6053
|
+
function hasDistinctScope(left, right) {
|
|
6054
|
+
if (left.pageId && right.pageId && left.pageId !== right.pageId) {
|
|
6055
|
+
return true;
|
|
6056
|
+
}
|
|
6057
|
+
const leftSources = new Set(left.sourceIds);
|
|
6058
|
+
const rightSources = new Set(right.sourceIds);
|
|
6059
|
+
const leftOnly = [...leftSources].some((sourceId) => !rightSources.has(sourceId));
|
|
6060
|
+
const rightOnly = [...rightSources].some((sourceId) => !leftSources.has(sourceId));
|
|
6061
|
+
return leftOnly || rightOnly;
|
|
6062
|
+
}
|
|
6063
|
+
function supportCount(values) {
|
|
6064
|
+
return values?.size ?? 0;
|
|
6065
|
+
}
|
|
6066
|
+
function similarityScore(reasons) {
|
|
6067
|
+
const concept = supportCount(reasons.get("shared_concept"));
|
|
6068
|
+
const entity = supportCount(reasons.get("shared_entity"));
|
|
6069
|
+
const symbol = supportCount(reasons.get("shared_symbol"));
|
|
6070
|
+
const rationale = supportCount(reasons.get("shared_rationale_theme"));
|
|
6071
|
+
const sourceType = supportCount(reasons.get("shared_source_type"));
|
|
6072
|
+
const tag = supportCount(reasons.get("shared_tag"));
|
|
6073
|
+
const categoryCount = [...reasons.keys()].length;
|
|
6074
|
+
const weighted = (concept ? 0.46 + Math.min(0.12, (concept - 1) * 0.04) : 0) + (entity ? 0.34 + Math.min(0.1, (entity - 1) * 0.03) : 0) + (symbol ? 0.24 + Math.min(0.08, (symbol - 1) * 0.02) : 0) + (rationale ? 0.18 + Math.min(0.08, (rationale - 1) * 0.03) : 0) + (sourceType ? 0.1 : 0) + (tag ? 0.12 + Math.min(0.04, (tag - 1) * 0.02) : 0);
|
|
6075
|
+
const categoryBonus = categoryCount >= 3 ? 0.08 : categoryCount === 2 ? 0.04 : 0;
|
|
6076
|
+
return Math.min(0.96, weighted + categoryBonus);
|
|
6077
|
+
}
|
|
6078
|
+
function describeSimilarityReasons(reasons) {
|
|
6079
|
+
if (!reasons?.length) {
|
|
6080
|
+
return "This link is inferred from multiple shared graph features.";
|
|
6081
|
+
}
|
|
6082
|
+
const labels = reasons.map(
|
|
6083
|
+
(reason) => reason === "shared_concept" ? "shared concepts" : reason === "shared_entity" ? "shared entities" : reason === "shared_symbol" ? "shared symbols" : reason === "shared_rationale_theme" ? "shared rationale themes" : reason === "shared_source_type" ? "shared source type" : "shared tags"
|
|
6084
|
+
);
|
|
6085
|
+
return `This link is inferred from ${labels.join(", ")}.`;
|
|
6086
|
+
}
|
|
6087
|
+
function nodeContexts(nodes, manifests, analyses) {
|
|
6088
|
+
const manifestsBySourceId = new Map(manifests.map((manifest) => [manifest.sourceId, manifest]));
|
|
6089
|
+
const analysesBySourceId = new Map(analyses.map((analysis) => [analysis.sourceId, analysis]));
|
|
6090
|
+
return nodes.filter((node) => node.type !== "symbol" && node.type !== "concept" && node.type !== "entity").map((node) => {
|
|
6091
|
+
const features = /* @__PURE__ */ new Map();
|
|
6092
|
+
if (node.type === "source" || node.type === "module") {
|
|
6093
|
+
for (const sourceId of node.sourceIds) {
|
|
6094
|
+
const analysis = analysesBySourceId.get(sourceId);
|
|
6095
|
+
const manifest = manifestsBySourceId.get(sourceId);
|
|
6096
|
+
if (!analysis) {
|
|
6097
|
+
continue;
|
|
6098
|
+
}
|
|
6099
|
+
for (const concept of analysis.concepts) {
|
|
6100
|
+
addFeature(features, "shared_concept", concept.name);
|
|
6101
|
+
}
|
|
6102
|
+
for (const entity of analysis.entities) {
|
|
6103
|
+
addFeature(features, "shared_entity", entity.name);
|
|
6104
|
+
}
|
|
6105
|
+
if (manifest?.sourceType) {
|
|
6106
|
+
addFeature(features, "shared_source_type", manifest.sourceType);
|
|
6107
|
+
}
|
|
6108
|
+
if (analysis.code) {
|
|
6109
|
+
const exportedSymbols = analysis.code.symbols.filter((symbol) => symbol.exported);
|
|
6110
|
+
for (const symbol of (exportedSymbols.length ? exportedSymbols : analysis.code.symbols).slice(0, 12)) {
|
|
6111
|
+
addFeature(features, "shared_symbol", symbol.name);
|
|
6112
|
+
}
|
|
6113
|
+
}
|
|
6114
|
+
for (const rationale of analysis.rationales) {
|
|
6115
|
+
for (const token of themeTokens(rationale.text)) {
|
|
6116
|
+
addFeature(features, "shared_rationale_theme", token);
|
|
6117
|
+
}
|
|
6118
|
+
}
|
|
6119
|
+
}
|
|
6120
|
+
} else if (node.type === "rationale") {
|
|
6121
|
+
for (const sourceId of node.sourceIds) {
|
|
6122
|
+
const analysis = analysesBySourceId.get(sourceId);
|
|
6123
|
+
const manifest = manifestsBySourceId.get(sourceId);
|
|
6124
|
+
if (manifest?.sourceType) {
|
|
6125
|
+
addFeature(features, "shared_source_type", manifest.sourceType);
|
|
6126
|
+
}
|
|
6127
|
+
const rationale = analysis?.rationales.find((item) => item.id === node.id);
|
|
6128
|
+
for (const token of themeTokens(rationale?.text ?? node.label)) {
|
|
6129
|
+
addFeature(features, "shared_rationale_theme", token);
|
|
6130
|
+
}
|
|
6131
|
+
}
|
|
6132
|
+
}
|
|
6133
|
+
return { node, featureValues: features };
|
|
6134
|
+
}).filter((context) => context.featureValues.size > 0);
|
|
6135
|
+
}
|
|
6136
|
+
function buildSemanticSimilarityEdges(nodes, edges, manifests, analyses) {
|
|
6137
|
+
const contexts = nodeContexts(nodes, manifests, analyses);
|
|
6138
|
+
const contextsById = new Map(contexts.map((context) => [context.node.id, context]));
|
|
6139
|
+
const directPairs = new Set(edges.map((edge) => pairKey(edge.source, edge.target)));
|
|
6140
|
+
const pairReasons = /* @__PURE__ */ new Map();
|
|
6141
|
+
for (const reason of ["shared_concept", "shared_entity", "shared_symbol", "shared_rationale_theme", "shared_source_type"]) {
|
|
6142
|
+
const buckets = /* @__PURE__ */ new Map();
|
|
6143
|
+
for (const context of contexts) {
|
|
6144
|
+
for (const value of context.featureValues.get(reason) ?? []) {
|
|
6145
|
+
const bucketId = `${context.node.type}:${reason}:${value}`;
|
|
6146
|
+
if (!buckets.has(bucketId)) {
|
|
6147
|
+
buckets.set(bucketId, []);
|
|
6148
|
+
}
|
|
6149
|
+
buckets.get(bucketId)?.push(context.node.id);
|
|
6150
|
+
}
|
|
6151
|
+
}
|
|
6152
|
+
for (const [bucketId, nodeIds] of buckets.entries()) {
|
|
6153
|
+
if (nodeIds.length < 2) {
|
|
6154
|
+
continue;
|
|
6155
|
+
}
|
|
6156
|
+
const value = bucketId.slice(bucketId.indexOf(`${reason}:`) + `${reason}:`.length);
|
|
6157
|
+
const uniqueNodeIds = uniqueBy(nodeIds, (nodeId) => nodeId).sort((left, right) => left.localeCompare(right));
|
|
6158
|
+
for (let index = 0; index < uniqueNodeIds.length; index++) {
|
|
6159
|
+
const left = contextsById.get(uniqueNodeIds[index]);
|
|
6160
|
+
if (!left) {
|
|
6161
|
+
continue;
|
|
6162
|
+
}
|
|
6163
|
+
for (let cursor = index + 1; cursor < uniqueNodeIds.length; cursor++) {
|
|
6164
|
+
const right = contextsById.get(uniqueNodeIds[cursor]);
|
|
6165
|
+
if (!right || !hasDistinctScope(left.node, right.node)) {
|
|
6166
|
+
continue;
|
|
6167
|
+
}
|
|
6168
|
+
const key = pairKey(left.node.id, right.node.id);
|
|
6169
|
+
if (directPairs.has(key)) {
|
|
6170
|
+
continue;
|
|
6171
|
+
}
|
|
6172
|
+
if (!pairReasons.has(key)) {
|
|
6173
|
+
pairReasons.set(key, /* @__PURE__ */ new Map());
|
|
6174
|
+
}
|
|
6175
|
+
if (!pairReasons.get(key)?.has(reason)) {
|
|
6176
|
+
pairReasons.get(key)?.set(reason, /* @__PURE__ */ new Set());
|
|
6177
|
+
}
|
|
6178
|
+
pairReasons.get(key)?.get(reason)?.add(value);
|
|
6179
|
+
}
|
|
6180
|
+
}
|
|
6181
|
+
}
|
|
6182
|
+
}
|
|
6183
|
+
return [...pairReasons.entries()].flatMap(([key, reasons]) => {
|
|
6184
|
+
const [leftId, rightId] = key.split("|");
|
|
6185
|
+
const left = contextsById.get(leftId)?.node;
|
|
6186
|
+
const right = contextsById.get(rightId)?.node;
|
|
6187
|
+
if (!left || !right) {
|
|
6188
|
+
return [];
|
|
6189
|
+
}
|
|
6190
|
+
const confidence = similarityScore(reasons);
|
|
6191
|
+
if (confidence < 0.5) {
|
|
6192
|
+
return [];
|
|
6193
|
+
}
|
|
6194
|
+
return [
|
|
6195
|
+
{
|
|
6196
|
+
id: `similar:${sha256(`${left.id}|${right.id}|${[...reasons.keys()].sort().join(",")}`).slice(0, 16)}`,
|
|
6197
|
+
source: left.id,
|
|
6198
|
+
target: right.id,
|
|
6199
|
+
relation: "semantically_similar_to",
|
|
6200
|
+
status: "inferred",
|
|
6201
|
+
evidenceClass: "inferred",
|
|
6202
|
+
confidence,
|
|
6203
|
+
provenance: uniqueBy(
|
|
6204
|
+
[...left.sourceIds, ...right.sourceIds].sort((a, b) => a.localeCompare(b)),
|
|
6205
|
+
(value) => value
|
|
6206
|
+
),
|
|
6207
|
+
similarityReasons: [...reasons.keys()].sort((a, b) => a.localeCompare(b))
|
|
6208
|
+
}
|
|
6209
|
+
];
|
|
6210
|
+
}).sort((left, right) => right.confidence - left.confidence || left.id.localeCompare(right.id));
|
|
6211
|
+
}
|
|
6212
|
+
function buildTopicHyperedges(graph) {
|
|
6213
|
+
const nodesById = new Map(graph.nodes.map((node) => [node.id, node]));
|
|
6214
|
+
const connectedSources = /* @__PURE__ */ new Map();
|
|
6215
|
+
for (const edge of graph.edges) {
|
|
6216
|
+
if (edge.relation !== "mentions" || edge.evidenceClass !== "extracted") {
|
|
6217
|
+
continue;
|
|
6218
|
+
}
|
|
6219
|
+
const sourceNode = nodesById.get(edge.source);
|
|
6220
|
+
const targetNode = nodesById.get(edge.target);
|
|
6221
|
+
if (sourceNode?.type !== "source" || !(targetNode?.type === "concept" || targetNode?.type === "entity")) {
|
|
6222
|
+
continue;
|
|
6223
|
+
}
|
|
6224
|
+
if (!connectedSources.has(targetNode.id)) {
|
|
6225
|
+
connectedSources.set(targetNode.id, []);
|
|
6226
|
+
}
|
|
6227
|
+
connectedSources.get(targetNode.id)?.push(sourceNode.id);
|
|
6228
|
+
}
|
|
6229
|
+
return [...connectedSources.entries()].flatMap(([anchorId, members]) => {
|
|
6230
|
+
const anchor = nodesById.get(anchorId);
|
|
6231
|
+
const uniqueMembers = uniqueBy(members, (member) => member).sort((left, right) => left.localeCompare(right));
|
|
6232
|
+
if (!anchor || uniqueMembers.length < 3) {
|
|
6233
|
+
return [];
|
|
6234
|
+
}
|
|
6235
|
+
const nodeIds = [anchor.id, ...uniqueMembers];
|
|
6236
|
+
const sourcePageIds = uniqueBy(nodeIds.map((nodeId) => nodesById.get(nodeId)?.pageId ?? "").filter(Boolean), (value) => value);
|
|
6237
|
+
return [
|
|
6238
|
+
{
|
|
6239
|
+
id: `hyper:${sha256(`participate_in|${anchor.id}|${uniqueMembers.join("|")}`).slice(0, 16)}`,
|
|
6240
|
+
label: anchor.label,
|
|
6241
|
+
relation: "participate_in",
|
|
6242
|
+
nodeIds,
|
|
6243
|
+
evidenceClass: "extracted",
|
|
6244
|
+
confidence: Math.min(0.96, 0.72 + uniqueMembers.length * 0.06),
|
|
6245
|
+
sourcePageIds,
|
|
6246
|
+
why: `${uniqueMembers.length} source nodes converge on ${anchor.label} through extracted mention edges.`
|
|
6247
|
+
}
|
|
6248
|
+
];
|
|
6249
|
+
});
|
|
6250
|
+
}
|
|
6251
|
+
function buildModuleFormHyperedges(graph) {
|
|
6252
|
+
const nodesById = new Map(graph.nodes.map((node) => [node.id, node]));
|
|
6253
|
+
const definedSymbols = /* @__PURE__ */ new Map();
|
|
6254
|
+
for (const edge of graph.edges) {
|
|
6255
|
+
if (edge.relation !== "defines" || edge.evidenceClass !== "extracted") {
|
|
6256
|
+
continue;
|
|
6257
|
+
}
|
|
6258
|
+
const moduleNode = nodesById.get(edge.source);
|
|
6259
|
+
const symbolNode = nodesById.get(edge.target);
|
|
6260
|
+
if (moduleNode?.type !== "module" || symbolNode?.type !== "symbol") {
|
|
6261
|
+
continue;
|
|
6262
|
+
}
|
|
6263
|
+
if (!definedSymbols.has(moduleNode.id)) {
|
|
6264
|
+
definedSymbols.set(moduleNode.id, []);
|
|
6265
|
+
}
|
|
6266
|
+
definedSymbols.get(moduleNode.id)?.push(symbolNode.id);
|
|
6267
|
+
}
|
|
6268
|
+
return [...definedSymbols.entries()].flatMap(([moduleId, members]) => {
|
|
6269
|
+
const moduleNode = nodesById.get(moduleId);
|
|
6270
|
+
const uniqueMembers = uniqueBy(members, (member) => member).sort((left, right) => left.localeCompare(right));
|
|
6271
|
+
if (!moduleNode || uniqueMembers.length < 3) {
|
|
6272
|
+
return [];
|
|
6273
|
+
}
|
|
6274
|
+
const nodeIds = [moduleNode.id, ...uniqueMembers];
|
|
6275
|
+
const sourcePageIds = uniqueBy(nodeIds.map((nodeId) => nodesById.get(nodeId)?.pageId ?? "").filter(Boolean), (value) => value);
|
|
6276
|
+
return [
|
|
6277
|
+
{
|
|
6278
|
+
id: `hyper:${sha256(`form|${moduleNode.id}|${uniqueMembers.join("|")}`).slice(0, 16)}`,
|
|
6279
|
+
label: `${moduleNode.label} API`,
|
|
6280
|
+
relation: "form",
|
|
6281
|
+
nodeIds,
|
|
6282
|
+
evidenceClass: "extracted",
|
|
6283
|
+
confidence: Math.min(0.98, 0.78 + uniqueMembers.length * 0.04),
|
|
6284
|
+
sourcePageIds,
|
|
6285
|
+
why: `${moduleNode.label} and ${uniqueMembers.length} defined symbols form one local module surface.`
|
|
6286
|
+
}
|
|
6287
|
+
];
|
|
6288
|
+
});
|
|
6289
|
+
}
|
|
6290
|
+
function enrichGraph(graph, manifests, analyses) {
|
|
6291
|
+
const similarityEdges = buildSemanticSimilarityEdges(graph.nodes, graph.edges, manifests, analyses);
|
|
6292
|
+
const enrichedEdges = [...graph.edges, ...similarityEdges].sort((left, right) => left.id.localeCompare(right.id));
|
|
6293
|
+
const hyperedges = uniqueBy(
|
|
6294
|
+
[
|
|
6295
|
+
...buildTopicHyperedges({ ...graph, edges: enrichedEdges, hyperedges: [] }),
|
|
6296
|
+
...buildModuleFormHyperedges({ ...graph, edges: enrichedEdges, hyperedges: [] })
|
|
6297
|
+
].sort((left, right) => right.confidence - left.confidence || left.label.localeCompare(right.label)),
|
|
6298
|
+
(hyperedge) => hyperedge.id
|
|
6299
|
+
);
|
|
6300
|
+
return {
|
|
6301
|
+
edges: enrichedEdges,
|
|
6302
|
+
hyperedges
|
|
6303
|
+
};
|
|
6304
|
+
}
|
|
6305
|
+
|
|
5677
6306
|
// src/graph-tools.ts
|
|
5678
6307
|
function normalizeTarget(value) {
|
|
5679
6308
|
return normalizeWhitespace(value).toLowerCase();
|
|
@@ -5684,6 +6313,9 @@ function nodeById(graph) {
|
|
|
5684
6313
|
function pageById(graph) {
|
|
5685
6314
|
return new Map(graph.pages.map((page) => [page.id, page]));
|
|
5686
6315
|
}
|
|
6316
|
+
function hyperedgesForNode(graph, nodeId) {
|
|
6317
|
+
return (graph.hyperedges ?? []).filter((hyperedge) => hyperedge.nodeIds.includes(nodeId)).sort((left, right) => right.confidence - left.confidence || left.label.localeCompare(right.label));
|
|
6318
|
+
}
|
|
5687
6319
|
function scoreMatch(query, candidate) {
|
|
5688
6320
|
const normalizedQuery = normalizeTarget(query);
|
|
5689
6321
|
const normalizedCandidate = normalizeTarget(candidate);
|
|
@@ -5732,6 +6364,14 @@ function nodeMatches(graph, query) {
|
|
|
5732
6364
|
score: Math.max(scoreMatch(query, node.label), scoreMatch(query, node.id))
|
|
5733
6365
|
})).filter((match) => match.score > 0).sort((left, right) => right.score - left.score || left.label.localeCompare(right.label));
|
|
5734
6366
|
}
|
|
6367
|
+
function hyperedgeMatches(graph, query) {
|
|
6368
|
+
return (graph.hyperedges ?? []).map((hyperedge) => ({
|
|
6369
|
+
type: "hyperedge",
|
|
6370
|
+
id: hyperedge.id,
|
|
6371
|
+
label: hyperedge.label,
|
|
6372
|
+
score: Math.max(scoreMatch(query, hyperedge.label), scoreMatch(query, hyperedge.why), scoreMatch(query, hyperedge.relation))
|
|
6373
|
+
})).filter((match) => match.score > 0).sort((left, right) => right.score - left.score || left.label.localeCompare(right.label));
|
|
6374
|
+
}
|
|
5735
6375
|
function graphAdjacency(graph) {
|
|
5736
6376
|
const adjacency = /* @__PURE__ */ new Map();
|
|
5737
6377
|
const push = (nodeId, item) => {
|
|
@@ -5780,14 +6420,15 @@ function queryGraph(graph, question, searchResults, options) {
|
|
|
5780
6420
|
const traversal = options?.traversal ?? "bfs";
|
|
5781
6421
|
const budget = Math.max(3, Math.min(options?.budget ?? 12, 50));
|
|
5782
6422
|
const matches = uniqueBy(
|
|
5783
|
-
[...pageSearchMatches(graph, question, searchResults), ...nodeMatches(graph, question)],
|
|
6423
|
+
[...pageSearchMatches(graph, question, searchResults), ...nodeMatches(graph, question), ...hyperedgeMatches(graph, question)],
|
|
5784
6424
|
(match) => `${match.type}:${match.id}`
|
|
5785
6425
|
).sort((left, right) => right.score - left.score || left.label.localeCompare(right.label)).slice(0, 12);
|
|
5786
6426
|
const pages = pageById(graph);
|
|
5787
6427
|
const seeds = uniqueBy(
|
|
5788
6428
|
[
|
|
5789
6429
|
...searchResults.flatMap((result) => pages.get(result.pageId)?.nodeIds ?? []),
|
|
5790
|
-
...matches.filter((match) => match.type === "node").map((match) => match.id)
|
|
6430
|
+
...matches.filter((match) => match.type === "node").map((match) => match.id),
|
|
6431
|
+
...matches.filter((match) => match.type === "hyperedge").flatMap((match) => graph.hyperedges.find((hyperedge) => hyperedge.id === match.id)?.nodeIds ?? [])
|
|
5791
6432
|
],
|
|
5792
6433
|
(item) => item
|
|
5793
6434
|
).filter(Boolean);
|
|
@@ -5828,6 +6469,10 @@ function queryGraph(graph, question, searchResults, options) {
|
|
|
5828
6469
|
visitedNodeIds.map((nodeId) => nodes.get(nodeId)?.communityId).filter((communityId) => Boolean(communityId)),
|
|
5829
6470
|
(item) => item
|
|
5830
6471
|
);
|
|
6472
|
+
const hyperedgeIds = uniqueBy(
|
|
6473
|
+
(graph.hyperedges ?? []).filter((hyperedge) => hyperedge.nodeIds.some((nodeId) => visitedNodeIds.includes(nodeId))).map((hyperedge) => hyperedge.id),
|
|
6474
|
+
(item) => item
|
|
6475
|
+
);
|
|
5831
6476
|
return {
|
|
5832
6477
|
question,
|
|
5833
6478
|
traversal,
|
|
@@ -5838,6 +6483,7 @@ function queryGraph(graph, question, searchResults, options) {
|
|
|
5838
6483
|
),
|
|
5839
6484
|
visitedNodeIds,
|
|
5840
6485
|
visitedEdgeIds: [...visitedEdgeIds],
|
|
6486
|
+
hyperedgeIds,
|
|
5841
6487
|
pageIds,
|
|
5842
6488
|
communities,
|
|
5843
6489
|
matches,
|
|
@@ -5845,6 +6491,7 @@ function queryGraph(graph, question, searchResults, options) {
|
|
|
5845
6491
|
`Seeds: ${seeds.join(", ") || "none"}`,
|
|
5846
6492
|
`Visited nodes: ${visitedNodeIds.length}`,
|
|
5847
6493
|
`Visited edges: ${visitedEdgeIds.size}`,
|
|
6494
|
+
`Touched group patterns: ${hyperedgeIds.length}`,
|
|
5848
6495
|
`Communities: ${communities.join(", ") || "none"}`,
|
|
5849
6496
|
`Pages: ${pageIds.join(", ") || "none"}`
|
|
5850
6497
|
].join("\n")
|
|
@@ -5964,11 +6611,13 @@ function explainGraphTarget(graph, target) {
|
|
|
5964
6611
|
page,
|
|
5965
6612
|
community: communityLabel(graph, node.communityId),
|
|
5966
6613
|
neighbors,
|
|
6614
|
+
hyperedges: hyperedgesForNode(graph, node.id),
|
|
5967
6615
|
summary: [
|
|
5968
6616
|
`Node: ${node.label}`,
|
|
5969
6617
|
`Type: ${node.type}`,
|
|
5970
6618
|
`Community: ${node.communityId ?? "none"}`,
|
|
5971
6619
|
`Neighbors: ${neighbors.length}`,
|
|
6620
|
+
`Group patterns: ${hyperedgesForNode(graph, node.id).length}`,
|
|
5972
6621
|
`Page: ${page?.path ?? "none"}`
|
|
5973
6622
|
].join("\n")
|
|
5974
6623
|
};
|
|
@@ -5976,14 +6625,31 @@ function explainGraphTarget(graph, target) {
|
|
|
5976
6625
|
function topGodNodes(graph, limit = 10) {
|
|
5977
6626
|
return graph.nodes.filter((node) => node.isGodNode).sort((left, right) => (right.degree ?? 0) - (left.degree ?? 0)).slice(0, limit);
|
|
5978
6627
|
}
|
|
6628
|
+
function listHyperedges(graph, target, limit = 25) {
|
|
6629
|
+
if (!target) {
|
|
6630
|
+
return [...graph.hyperedges ?? []].sort((left, right) => right.confidence - left.confidence || left.label.localeCompare(right.label)).slice(0, limit);
|
|
6631
|
+
}
|
|
6632
|
+
const node = resolveNode(graph, target);
|
|
6633
|
+
if (node) {
|
|
6634
|
+
return hyperedgesForNode(graph, node.id).slice(0, limit);
|
|
6635
|
+
}
|
|
6636
|
+
const page = graph.pages.find((candidate) => normalizeTarget(candidate.path) === normalizeTarget(target) || candidate.id === target);
|
|
6637
|
+
if (!page) {
|
|
6638
|
+
return [];
|
|
6639
|
+
}
|
|
6640
|
+
return (graph.hyperedges ?? []).filter((hyperedge) => hyperedge.sourcePageIds.includes(page.id) || page.nodeIds.some((nodeId) => hyperedge.nodeIds.includes(nodeId))).sort((left, right) => right.confidence - left.confidence || left.label.localeCompare(right.label)).slice(0, limit);
|
|
6641
|
+
}
|
|
5979
6642
|
|
|
5980
6643
|
// src/markdown.ts
|
|
5981
|
-
import
|
|
5982
|
-
function
|
|
6644
|
+
import matter5 from "gray-matter";
|
|
6645
|
+
function uniqueStrings2(values) {
|
|
5983
6646
|
return uniqueBy(values.filter(Boolean), (value) => value);
|
|
5984
6647
|
}
|
|
6648
|
+
function safeFrontmatter(value) {
|
|
6649
|
+
return JSON.parse(JSON.stringify(value));
|
|
6650
|
+
}
|
|
5985
6651
|
function decoratedTags(baseTags, decorations) {
|
|
5986
|
-
return
|
|
6652
|
+
return uniqueStrings2([
|
|
5987
6653
|
...baseTags,
|
|
5988
6654
|
...(decorations?.projectIds ?? []).map((projectId) => `project/${projectId}`),
|
|
5989
6655
|
...decorations?.extraTags ?? []
|
|
@@ -6062,6 +6728,7 @@ function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutput
|
|
|
6062
6728
|
page_id: pageId,
|
|
6063
6729
|
kind: "source",
|
|
6064
6730
|
title: analysis.title,
|
|
6731
|
+
...manifest.sourceType ? { source_type: manifest.sourceType } : {},
|
|
6065
6732
|
tags: decoratedTags(analysis.code ? ["source", "code"] : ["source"], decorations),
|
|
6066
6733
|
source_ids: [manifest.sourceId],
|
|
6067
6734
|
project_ids: decorations?.projectIds ?? [],
|
|
@@ -6084,6 +6751,7 @@ function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutput
|
|
|
6084
6751
|
"",
|
|
6085
6752
|
`Source ID: \`${manifest.sourceId}\``,
|
|
6086
6753
|
manifest.url ? `Source URL: ${manifest.url}` : `Source Path: \`${manifest.originalPath ?? manifest.storedPath}\``,
|
|
6754
|
+
...manifest.sourceType ? [`Source Type: \`${manifest.sourceType}\``, ""] : [""],
|
|
6087
6755
|
"",
|
|
6088
6756
|
"## Summary",
|
|
6089
6757
|
"",
|
|
@@ -6128,6 +6796,7 @@ function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutput
|
|
|
6128
6796
|
path: relativePath,
|
|
6129
6797
|
title: analysis.title,
|
|
6130
6798
|
kind: "source",
|
|
6799
|
+
sourceType: manifest.sourceType,
|
|
6131
6800
|
sourceIds: [manifest.sourceId],
|
|
6132
6801
|
projectIds: decorations?.projectIds ?? [],
|
|
6133
6802
|
nodeIds,
|
|
@@ -6145,7 +6814,7 @@ function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutput
|
|
|
6145
6814
|
compiledFrom: metadata.compiledFrom,
|
|
6146
6815
|
managedBy: metadata.managedBy
|
|
6147
6816
|
},
|
|
6148
|
-
content:
|
|
6817
|
+
content: matter5.stringify(body, safeFrontmatter(frontmatter))
|
|
6149
6818
|
};
|
|
6150
6819
|
}
|
|
6151
6820
|
function buildModulePage(input) {
|
|
@@ -6160,7 +6829,7 @@ function buildModulePage(input) {
|
|
|
6160
6829
|
const nodeIds = [code.moduleId, ...code.symbols.map((symbol) => symbol.id)];
|
|
6161
6830
|
const localModuleBacklinks = input.localModules.map((moduleRef) => moduleRef.page.id);
|
|
6162
6831
|
const relatedOutputs = input.relatedOutputs ?? [];
|
|
6163
|
-
const backlinks =
|
|
6832
|
+
const backlinks = uniqueStrings2([sourcePage.id, ...localModuleBacklinks, ...relatedOutputs.map((page) => page.id)]);
|
|
6164
6833
|
const importsSection = code.imports.length ? code.imports.map((item) => {
|
|
6165
6834
|
const localModule = item.resolvedSourceId ? input.localModules.find((moduleRef) => moduleRef.sourceId === item.resolvedSourceId && moduleRef.reExport === item.reExport) : void 0;
|
|
6166
6835
|
const importedBits = [
|
|
@@ -6206,9 +6875,9 @@ function buildModulePage(input) {
|
|
|
6206
6875
|
source_hashes: {
|
|
6207
6876
|
[manifest.sourceId]: manifest.contentHash
|
|
6208
6877
|
},
|
|
6209
|
-
related_page_ids:
|
|
6878
|
+
related_page_ids: uniqueStrings2([sourcePage.id, ...localModuleBacklinks, ...relatedOutputs.map((page) => page.id)]),
|
|
6210
6879
|
related_node_ids: [],
|
|
6211
|
-
related_source_ids:
|
|
6880
|
+
related_source_ids: uniqueStrings2([
|
|
6212
6881
|
manifest.sourceId,
|
|
6213
6882
|
...input.localModules.map((moduleRef) => moduleRef.sourceId),
|
|
6214
6883
|
...relatedOutputs.flatMap((page) => page.sourceIds)
|
|
@@ -6280,9 +6949,9 @@ function buildModulePage(input) {
|
|
|
6280
6949
|
backlinks,
|
|
6281
6950
|
schemaHash,
|
|
6282
6951
|
sourceHashes: { [manifest.sourceId]: manifest.contentHash },
|
|
6283
|
-
relatedPageIds:
|
|
6952
|
+
relatedPageIds: uniqueStrings2([sourcePage.id, ...localModuleBacklinks, ...relatedOutputs.map((page) => page.id)]),
|
|
6284
6953
|
relatedNodeIds: [],
|
|
6285
|
-
relatedSourceIds:
|
|
6954
|
+
relatedSourceIds: uniqueStrings2([
|
|
6286
6955
|
manifest.sourceId,
|
|
6287
6956
|
...input.localModules.map((moduleRef) => moduleRef.sourceId),
|
|
6288
6957
|
...relatedOutputs.flatMap((page) => page.sourceIds)
|
|
@@ -6292,7 +6961,7 @@ function buildModulePage(input) {
|
|
|
6292
6961
|
compiledFrom: metadata.compiledFrom,
|
|
6293
6962
|
managedBy: metadata.managedBy
|
|
6294
6963
|
},
|
|
6295
|
-
content:
|
|
6964
|
+
content: matter5.stringify(body, frontmatter)
|
|
6296
6965
|
};
|
|
6297
6966
|
}
|
|
6298
6967
|
function buildAggregatePage(kind, name, descriptions, sourceAnalyses, sourceHashes, schemaHash, metadata, relativePath, relatedOutputs = [], decorations) {
|
|
@@ -6363,7 +7032,7 @@ function buildAggregatePage(kind, name, descriptions, sourceAnalyses, sourceHash
|
|
|
6363
7032
|
compiledFrom: metadata.compiledFrom,
|
|
6364
7033
|
managedBy: metadata.managedBy
|
|
6365
7034
|
},
|
|
6366
|
-
content:
|
|
7035
|
+
content: matter5.stringify(body, frontmatter)
|
|
6367
7036
|
};
|
|
6368
7037
|
}
|
|
6369
7038
|
function buildIndexPage(pages, schemaHash, metadata, projectPages = []) {
|
|
@@ -6439,7 +7108,7 @@ function buildIndexPage(pages, schemaHash, metadata, projectPages = []) {
|
|
|
6439
7108
|
}
|
|
6440
7109
|
function buildSectionIndex(kind, pages, schemaHash, metadata, projectIds = []) {
|
|
6441
7110
|
const title = kind.charAt(0).toUpperCase() + kind.slice(1);
|
|
6442
|
-
return
|
|
7111
|
+
return matter5.stringify(
|
|
6443
7112
|
[`# ${title}`, "", ...pages.map((page) => `- [[${page.path.replace(/\.md$/, "")}|${page.title}]]`), ""].join("\n"),
|
|
6444
7113
|
{
|
|
6445
7114
|
page_id: `${kind}:index`,
|
|
@@ -6470,38 +7139,219 @@ function nodeSummary(node) {
|
|
|
6470
7139
|
const bridge = typeof node.bridgeScore === "number" ? `bridge=${node.bridgeScore}` : "";
|
|
6471
7140
|
return [node.type, degree, bridge].filter(Boolean).join(", ");
|
|
6472
7141
|
}
|
|
6473
|
-
function
|
|
7142
|
+
function sourceTypeForNode(node, pagesById) {
|
|
7143
|
+
if (!node?.pageId) {
|
|
7144
|
+
return void 0;
|
|
7145
|
+
}
|
|
7146
|
+
return pagesById.get(node.pageId)?.sourceType;
|
|
7147
|
+
}
|
|
7148
|
+
function supportingPathDetails(graph, edge) {
|
|
7149
|
+
const path23 = shortestGraphPath(graph, edge.source, edge.target);
|
|
7150
|
+
const edgesById = new Map(graph.edges.map((item) => [item.id, item]));
|
|
7151
|
+
const pathEdges = path23.edgeIds.map((edgeId) => edgesById.get(edgeId)).filter((item) => Boolean(item));
|
|
7152
|
+
return {
|
|
7153
|
+
pathNodeIds: path23.nodeIds,
|
|
7154
|
+
pathEdgeIds: path23.edgeIds,
|
|
7155
|
+
pathRelations: pathEdges.map((item) => item.relation),
|
|
7156
|
+
pathEvidenceClasses: pathEdges.map((item) => item.evidenceClass),
|
|
7157
|
+
pathSummary: path23.summary
|
|
7158
|
+
};
|
|
7159
|
+
}
|
|
7160
|
+
function surpriseScore(edge, graph, pagesById, hyperedgesByNodeId) {
|
|
6474
7161
|
const nodesById = new Map(graph.nodes.map((node) => [node.id, node]));
|
|
6475
|
-
|
|
6476
|
-
|
|
6477
|
-
|
|
6478
|
-
|
|
6479
|
-
|
|
7162
|
+
const source = nodesById.get(edge.source);
|
|
7163
|
+
const target = nodesById.get(edge.target);
|
|
7164
|
+
const reasons = [];
|
|
7165
|
+
let score = edge.confidence * 0.45;
|
|
7166
|
+
if (source?.communityId && target?.communityId && source.communityId !== target.communityId) {
|
|
7167
|
+
score += 0.18;
|
|
7168
|
+
reasons.push(`it crosses communities ${source.communityId} and ${target.communityId}`);
|
|
7169
|
+
}
|
|
7170
|
+
if (source?.pageId && target?.pageId && source.pageId !== target.pageId) {
|
|
7171
|
+
score += 0.12;
|
|
7172
|
+
reasons.push("it spans different canonical pages");
|
|
7173
|
+
}
|
|
7174
|
+
if (source?.type && target?.type && source.type !== target.type) {
|
|
7175
|
+
score += 0.08;
|
|
7176
|
+
reasons.push(`it bridges ${source.type} and ${target.type} nodes`);
|
|
7177
|
+
}
|
|
7178
|
+
const sourceType = sourceTypeForNode(source, pagesById);
|
|
7179
|
+
const targetType = sourceTypeForNode(target, pagesById);
|
|
7180
|
+
if (sourceType && targetType && sourceType !== targetType) {
|
|
7181
|
+
score += 0.07;
|
|
7182
|
+
reasons.push(`it crosses source types (${sourceType} and ${targetType})`);
|
|
7183
|
+
}
|
|
7184
|
+
if ((source?.bridgeScore ?? 0) > 0 || (target?.bridgeScore ?? 0) > 0) {
|
|
7185
|
+
score += 0.08;
|
|
7186
|
+
reasons.push("a bridge node is involved");
|
|
7187
|
+
}
|
|
7188
|
+
if (edge.relation === "semantically_similar_to") {
|
|
7189
|
+
score += 0.12;
|
|
7190
|
+
reasons.push(describeSimilarityReasons(edge.similarityReasons));
|
|
7191
|
+
}
|
|
7192
|
+
if (edge.evidenceClass === "ambiguous") {
|
|
7193
|
+
score += 0.08;
|
|
7194
|
+
reasons.push("the supporting evidence is ambiguous");
|
|
7195
|
+
}
|
|
7196
|
+
const overlappingHyperedges = (hyperedgesByNodeId.get(edge.source) ?? []).filter((hyperedge) => hyperedge.nodeIds.includes(edge.target));
|
|
7197
|
+
if (overlappingHyperedges.length) {
|
|
7198
|
+
score += 0.06;
|
|
7199
|
+
reasons.push(`it also appears in ${overlappingHyperedges.length} group pattern${overlappingHyperedges.length === 1 ? "" : "s"}`);
|
|
7200
|
+
}
|
|
7201
|
+
const why = normalizeWhitespace(reasons.join("; ")) || "it links graph regions that are otherwise weakly connected";
|
|
7202
|
+
const explanation = normalizeWhitespace(`${source?.label ?? edge.source} connects to ${target?.label ?? edge.target} because ${why}.`);
|
|
7203
|
+
return { score: Math.min(0.99, score), why, explanation };
|
|
7204
|
+
}
|
|
7205
|
+
function topSurprisingConnections(graph, pagesById) {
|
|
7206
|
+
const nodesById = new Map(graph.nodes.map((node) => [node.id, node]));
|
|
7207
|
+
const hyperedgesByNodeId = /* @__PURE__ */ new Map();
|
|
7208
|
+
for (const hyperedge of graph.hyperedges ?? []) {
|
|
7209
|
+
for (const nodeId of hyperedge.nodeIds) {
|
|
7210
|
+
if (!hyperedgesByNodeId.has(nodeId)) {
|
|
7211
|
+
hyperedgesByNodeId.set(nodeId, []);
|
|
7212
|
+
}
|
|
7213
|
+
hyperedgesByNodeId.get(nodeId)?.push(hyperedge);
|
|
7214
|
+
}
|
|
7215
|
+
}
|
|
7216
|
+
return uniqueBy(
|
|
7217
|
+
graph.edges.filter((edge) => {
|
|
7218
|
+
const source = nodesById.get(edge.source);
|
|
7219
|
+
const target = nodesById.get(edge.target);
|
|
7220
|
+
return Boolean(
|
|
7221
|
+
source?.communityId && target?.communityId && source.communityId !== target.communityId || edge.relation === "semantically_similar_to" || edge.evidenceClass === "ambiguous" || source?.type && target?.type && source.type !== target.type
|
|
7222
|
+
);
|
|
7223
|
+
}).map((edge) => {
|
|
7224
|
+
const source = nodesById.get(edge.source);
|
|
7225
|
+
const target = nodesById.get(edge.target);
|
|
7226
|
+
const path23 = supportingPathDetails(graph, edge);
|
|
7227
|
+
const scored = surpriseScore(edge, graph, pagesById, hyperedgesByNodeId);
|
|
7228
|
+
return {
|
|
7229
|
+
id: edge.id,
|
|
7230
|
+
sourceNodeId: edge.source,
|
|
7231
|
+
sourceLabel: source?.label ?? edge.source,
|
|
7232
|
+
targetNodeId: edge.target,
|
|
7233
|
+
targetLabel: target?.label ?? edge.target,
|
|
7234
|
+
relation: edge.relation,
|
|
7235
|
+
evidenceClass: edge.evidenceClass,
|
|
7236
|
+
confidence: edge.confidence,
|
|
7237
|
+
pathNodeIds: path23.pathNodeIds,
|
|
7238
|
+
pathEdgeIds: path23.pathEdgeIds,
|
|
7239
|
+
pathRelations: path23.pathRelations,
|
|
7240
|
+
pathEvidenceClasses: path23.pathEvidenceClasses,
|
|
7241
|
+
pathSummary: path23.pathSummary,
|
|
7242
|
+
why: scored.why,
|
|
7243
|
+
explanation: scored.explanation,
|
|
7244
|
+
surpriseScore: scored.score
|
|
7245
|
+
};
|
|
7246
|
+
}).sort(
|
|
7247
|
+
(left, right) => right.surpriseScore - left.surpriseScore || right.confidence - left.confidence || left.id.localeCompare(right.id)
|
|
7248
|
+
).slice(0, 8),
|
|
7249
|
+
(connection) => connection.id
|
|
7250
|
+
).map(({ surpriseScore: _surpriseScore, ...connection }) => connection);
|
|
7251
|
+
}
|
|
7252
|
+
function topGroupPatterns(graph) {
|
|
7253
|
+
return [...graph.hyperedges ?? []].sort(
|
|
7254
|
+
(left, right) => right.confidence - left.confidence || right.nodeIds.length - left.nodeIds.length || left.label.localeCompare(right.label)
|
|
7255
|
+
).slice(0, 8);
|
|
6480
7256
|
}
|
|
6481
7257
|
function suggestedGraphQuestions(graph) {
|
|
6482
7258
|
const thinCommunities = (graph.communities ?? []).filter((community) => community.nodeIds.length <= 2);
|
|
6483
7259
|
const bridgeNodes = graph.nodes.filter((node) => (node.bridgeScore ?? 0) > 0).sort((left, right) => (right.bridgeScore ?? 0) - (left.bridgeScore ?? 0)).slice(0, 3);
|
|
6484
|
-
return
|
|
7260
|
+
return uniqueStrings2([
|
|
6485
7261
|
...thinCommunities.map((community) => `What sources would strengthen community ${community.label}?`),
|
|
6486
7262
|
...bridgeNodes.map((node) => `Why does ${node.label} connect multiple communities in the vault?`)
|
|
6487
7263
|
]).slice(0, 6);
|
|
6488
7264
|
}
|
|
7265
|
+
function buildGraphReportArtifact(input) {
|
|
7266
|
+
const pagesById = new Map(input.graph.pages.map((page) => [page.id, page]));
|
|
7267
|
+
const godNodes = input.graph.nodes.filter((node) => node.isGodNode).sort((left, right) => (right.degree ?? 0) - (left.degree ?? 0)).slice(0, 8);
|
|
7268
|
+
const bridgeNodes = input.graph.nodes.filter((node) => (node.bridgeScore ?? 0) > 0).sort((left, right) => (right.bridgeScore ?? 0) - (left.bridgeScore ?? 0)).slice(0, 8);
|
|
7269
|
+
const thinCommunities = (input.graph.communities ?? []).filter((community) => community.nodeIds.length <= 2).map((community) => {
|
|
7270
|
+
const page = input.communityPages.find((candidate) => candidate.id === `graph:${community.id}`);
|
|
7271
|
+
return {
|
|
7272
|
+
id: community.id,
|
|
7273
|
+
label: community.label,
|
|
7274
|
+
nodeCount: community.nodeIds.length,
|
|
7275
|
+
pageId: page?.id,
|
|
7276
|
+
path: page?.path,
|
|
7277
|
+
title: page?.title
|
|
7278
|
+
};
|
|
7279
|
+
});
|
|
7280
|
+
const surprisingConnections = topSurprisingConnections(input.graph, pagesById);
|
|
7281
|
+
const groupPatterns = topGroupPatterns(input.graph);
|
|
7282
|
+
return {
|
|
7283
|
+
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
7284
|
+
graphHash: input.graphHash,
|
|
7285
|
+
overview: {
|
|
7286
|
+
nodes: input.graph.nodes.length,
|
|
7287
|
+
edges: input.graph.edges.length,
|
|
7288
|
+
pages: input.graph.pages.length,
|
|
7289
|
+
communities: input.graph.communities?.length ?? 0
|
|
7290
|
+
},
|
|
7291
|
+
benchmark: input.benchmark ? {
|
|
7292
|
+
generatedAt: input.benchmark.generatedAt,
|
|
7293
|
+
stale: input.benchmarkStale ?? false,
|
|
7294
|
+
summary: input.benchmark.summary,
|
|
7295
|
+
questionCount: input.benchmark.sampleQuestions.length
|
|
7296
|
+
} : void 0,
|
|
7297
|
+
godNodes: godNodes.map((node) => ({
|
|
7298
|
+
nodeId: node.id,
|
|
7299
|
+
label: node.label,
|
|
7300
|
+
pageId: node.pageId,
|
|
7301
|
+
degree: node.degree,
|
|
7302
|
+
bridgeScore: node.bridgeScore
|
|
7303
|
+
})),
|
|
7304
|
+
bridgeNodes: bridgeNodes.map((node) => ({
|
|
7305
|
+
nodeId: node.id,
|
|
7306
|
+
label: node.label,
|
|
7307
|
+
pageId: node.pageId,
|
|
7308
|
+
degree: node.degree,
|
|
7309
|
+
bridgeScore: node.bridgeScore
|
|
7310
|
+
})),
|
|
7311
|
+
thinCommunities,
|
|
7312
|
+
surprisingConnections,
|
|
7313
|
+
groupPatterns,
|
|
7314
|
+
suggestedQuestions: suggestedGraphQuestions(input.graph),
|
|
7315
|
+
communityPages: input.communityPages.map((page) => ({
|
|
7316
|
+
id: page.id,
|
|
7317
|
+
path: page.path,
|
|
7318
|
+
title: page.title
|
|
7319
|
+
})),
|
|
7320
|
+
recentResearchSources: (input.recentResearchSources ?? []).map((page) => ({
|
|
7321
|
+
pageId: page.id,
|
|
7322
|
+
path: page.path,
|
|
7323
|
+
title: page.title,
|
|
7324
|
+
sourceType: page.sourceType,
|
|
7325
|
+
updatedAt: page.updatedAt
|
|
7326
|
+
}))
|
|
7327
|
+
};
|
|
7328
|
+
}
|
|
6489
7329
|
function buildGraphReportPage(input) {
|
|
6490
7330
|
const pageId = "graph:report";
|
|
6491
7331
|
const pathValue = pagePathFor("graph_report", "report");
|
|
6492
7332
|
const pagesById = new Map(input.graph.pages.map((page) => [page.id, page]));
|
|
6493
7333
|
const nodesById = new Map(input.graph.nodes.map((node) => [node.id, node]));
|
|
6494
|
-
const
|
|
6495
|
-
|
|
6496
|
-
|
|
6497
|
-
|
|
6498
|
-
|
|
6499
|
-
|
|
6500
|
-
|
|
6501
|
-
|
|
6502
|
-
...input.
|
|
7334
|
+
const relatedNodeIds = uniqueStrings2([
|
|
7335
|
+
...input.report.godNodes.map((node) => node.nodeId),
|
|
7336
|
+
...input.report.bridgeNodes.map((node) => node.nodeId),
|
|
7337
|
+
...input.report.surprisingConnections.flatMap((connection) => [
|
|
7338
|
+
connection.sourceNodeId,
|
|
7339
|
+
connection.targetNodeId,
|
|
7340
|
+
...connection.pathNodeIds
|
|
7341
|
+
]),
|
|
7342
|
+
...input.report.groupPatterns.flatMap((hyperedge) => hyperedge.nodeIds)
|
|
7343
|
+
]);
|
|
7344
|
+
const relatedPageIds = uniqueStrings2([
|
|
7345
|
+
...input.report.godNodes.map((node) => node.pageId ?? ""),
|
|
7346
|
+
...input.report.bridgeNodes.map((node) => node.pageId ?? ""),
|
|
7347
|
+
...input.report.communityPages.map((page) => page.id),
|
|
7348
|
+
...input.report.recentResearchSources.map((page) => page.pageId),
|
|
7349
|
+
...input.report.groupPatterns.flatMap((hyperedge) => hyperedge.sourcePageIds)
|
|
7350
|
+
]);
|
|
7351
|
+
const relatedSourceIds = uniqueStrings2([
|
|
7352
|
+
...relatedNodeIds.flatMap((nodeId) => nodesById.get(nodeId)?.sourceIds ?? []),
|
|
7353
|
+
...input.report.recentResearchSources.flatMap((page) => pagesById.get(page.pageId)?.sourceIds ?? [])
|
|
6503
7354
|
]);
|
|
6504
|
-
const relatedSourceIds = uniqueStrings(relatedNodeIds.flatMap((nodeId) => nodesById.get(nodeId)?.sourceIds ?? []));
|
|
6505
7355
|
const frontmatter = {
|
|
6506
7356
|
page_id: pageId,
|
|
6507
7357
|
kind: "graph_report",
|
|
@@ -6529,47 +7379,73 @@ function buildGraphReportPage(input) {
|
|
|
6529
7379
|
"",
|
|
6530
7380
|
"## Overview",
|
|
6531
7381
|
"",
|
|
6532
|
-
`- Nodes: ${input.
|
|
6533
|
-
`- Edges: ${input.
|
|
6534
|
-
`- Pages: ${input.
|
|
6535
|
-
`- Communities: ${input.
|
|
7382
|
+
`- Nodes: ${input.report.overview.nodes}`,
|
|
7383
|
+
`- Edges: ${input.report.overview.edges}`,
|
|
7384
|
+
`- Pages: ${input.report.overview.pages}`,
|
|
7385
|
+
`- Communities: ${input.report.overview.communities}`,
|
|
6536
7386
|
"",
|
|
6537
|
-
|
|
6538
|
-
|
|
6539
|
-
|
|
6540
|
-
`-
|
|
6541
|
-
`-
|
|
6542
|
-
`-
|
|
6543
|
-
`-
|
|
7387
|
+
"## Benchmark Summary",
|
|
7388
|
+
"",
|
|
7389
|
+
...input.report.benchmark ? [
|
|
7390
|
+
`- Generated At: ${input.report.benchmark.generatedAt}`,
|
|
7391
|
+
`- Status: ${input.report.benchmark.stale ? "Stale (graph changed since benchmark ran)" : "Fresh"}`,
|
|
7392
|
+
`- Naive Corpus Tokens: ${input.report.benchmark.summary.naiveCorpusTokens}`,
|
|
7393
|
+
`- Final Context Tokens: ${input.report.benchmark.summary.finalContextTokens}`,
|
|
7394
|
+
`- Unique Nodes Considered: ${input.report.benchmark.summary.uniqueVisitedNodes}`,
|
|
7395
|
+
`- Reduction Ratio: ${(input.report.benchmark.summary.reductionRatio * 100).toFixed(1)}%`,
|
|
7396
|
+
`- Questions: ${input.report.benchmark.questionCount}`,
|
|
6544
7397
|
""
|
|
6545
|
-
] : [],
|
|
6546
|
-
"## God Nodes",
|
|
7398
|
+
] : ["- No benchmark results yet.", ""],
|
|
7399
|
+
"## Top God Nodes",
|
|
6547
7400
|
"",
|
|
6548
|
-
...godNodes.length ? godNodes.map((node) =>
|
|
7401
|
+
...input.report.godNodes.length ? input.report.godNodes.map((node) => {
|
|
7402
|
+
const graphNode = nodesById.get(node.nodeId);
|
|
7403
|
+
return graphNode ? `- ${graphNodeLink(graphNode, pagesById)} (${nodeSummary(graphNode)})` : `- \`${node.nodeId}\``;
|
|
7404
|
+
}) : ["- No high-connectivity nodes detected."],
|
|
6549
7405
|
"",
|
|
6550
|
-
"## Bridge Nodes",
|
|
7406
|
+
"## Top Bridge Nodes",
|
|
6551
7407
|
"",
|
|
6552
|
-
...bridgeNodes.length ? bridgeNodes.map((node) =>
|
|
7408
|
+
...input.report.bridgeNodes.length ? input.report.bridgeNodes.map((node) => {
|
|
7409
|
+
const graphNode = nodesById.get(node.nodeId);
|
|
7410
|
+
return graphNode ? `- ${graphNodeLink(graphNode, pagesById)} (${nodeSummary(graphNode)})` : `- \`${node.nodeId}\``;
|
|
7411
|
+
}) : ["- No cross-community bridge nodes detected."],
|
|
6553
7412
|
"",
|
|
6554
7413
|
"## Communities",
|
|
6555
7414
|
"",
|
|
6556
|
-
...input.communityPages.length ? input.communityPages.map((page) => `- ${pageLink(page)}`) : ["- No community summaries generated yet."],
|
|
7415
|
+
...input.report.communityPages.length ? input.report.communityPages.map((page) => `- ${pageLink(page)}`) : ["- No community summaries generated yet."],
|
|
6557
7416
|
"",
|
|
6558
|
-
"## Thin
|
|
7417
|
+
"## Thin Or Underlinked Areas",
|
|
6559
7418
|
"",
|
|
6560
|
-
...thinCommunities.length ? thinCommunities.map(
|
|
7419
|
+
...input.report.thinCommunities.length ? input.report.thinCommunities.map(
|
|
7420
|
+
(community) => community.path ? `- [[${community.path.replace(/\.md$/, "")}|${community.title ?? community.label}]] (${community.nodeCount} node(s))` : `- ${community.label} (${community.nodeCount} node(s))`
|
|
7421
|
+
) : ["- No thin communities detected."],
|
|
6561
7422
|
"",
|
|
6562
|
-
"##
|
|
7423
|
+
"## Surprising Connections",
|
|
6563
7424
|
"",
|
|
6564
|
-
...
|
|
6565
|
-
const source = nodesById.get(
|
|
6566
|
-
const target = nodesById.get(
|
|
6567
|
-
|
|
7425
|
+
...input.report.surprisingConnections.length ? input.report.surprisingConnections.map((connection) => {
|
|
7426
|
+
const source = nodesById.get(connection.sourceNodeId);
|
|
7427
|
+
const target = nodesById.get(connection.targetNodeId);
|
|
7428
|
+
const sourceLabel = source ? graphNodeLink(source, pagesById) : `\`${connection.sourceNodeId}\``;
|
|
7429
|
+
const targetLabel = target ? graphNodeLink(target, pagesById) : `\`${connection.targetNodeId}\``;
|
|
7430
|
+
return `- ${sourceLabel} ${connection.relation} ${targetLabel} (${connection.evidenceClass}, ${connection.confidence.toFixed(2)}). Why: ${connection.why}. ${connection.explanation} Path: ${connection.pathSummary}.`;
|
|
6568
7431
|
}) : ["- No cross-community links detected."],
|
|
6569
7432
|
"",
|
|
6570
|
-
"##
|
|
7433
|
+
"## Group Patterns",
|
|
7434
|
+
"",
|
|
7435
|
+
...input.report.groupPatterns.length ? input.report.groupPatterns.map((hyperedge) => {
|
|
7436
|
+
const linkedNodes = hyperedge.nodeIds.map((nodeId) => nodesById.get(nodeId)).filter((node) => Boolean(node)).map((node) => graphNodeLink(node, pagesById)).join(", ");
|
|
7437
|
+
return `- ${hyperedge.label} (${hyperedge.relation}, ${hyperedge.evidenceClass}, ${hyperedge.confidence.toFixed(2)}). ${hyperedge.why} Members: ${linkedNodes}.`;
|
|
7438
|
+
}) : ["- No multi-node group patterns detected."],
|
|
7439
|
+
"",
|
|
7440
|
+
"## New Research Sources",
|
|
7441
|
+
"",
|
|
7442
|
+
...input.report.recentResearchSources.length ? input.report.recentResearchSources.map(
|
|
7443
|
+
(page) => `- [[${page.path.replace(/\.md$/, "")}|${page.title}]] (\`${page.sourceType}\`, updated ${page.updatedAt})`
|
|
7444
|
+
) : ["- No newly captured research sources since the previous compile."],
|
|
7445
|
+
"",
|
|
7446
|
+
"## Suggested Questions",
|
|
6571
7447
|
"",
|
|
6572
|
-
...
|
|
7448
|
+
...input.report.suggestedQuestions.map((question) => `- ${question}`),
|
|
6573
7449
|
""
|
|
6574
7450
|
].join("\n");
|
|
6575
7451
|
return {
|
|
@@ -6595,7 +7471,7 @@ function buildGraphReportPage(input) {
|
|
|
6595
7471
|
compiledFrom: input.metadata.compiledFrom,
|
|
6596
7472
|
managedBy: input.metadata.managedBy
|
|
6597
7473
|
},
|
|
6598
|
-
content:
|
|
7474
|
+
content: matter5.stringify(body, frontmatter)
|
|
6599
7475
|
};
|
|
6600
7476
|
}
|
|
6601
7477
|
function buildCommunitySummaryPage(input) {
|
|
@@ -6604,14 +7480,14 @@ function buildCommunitySummaryPage(input) {
|
|
|
6604
7480
|
const nodesById = new Map(input.graph.nodes.map((node) => [node.id, node]));
|
|
6605
7481
|
const pagesById = new Map(input.graph.pages.map((page) => [page.id, page]));
|
|
6606
7482
|
const communityNodes = input.community.nodeIds.map((nodeId) => nodesById.get(nodeId)).filter((node) => Boolean(node));
|
|
6607
|
-
const communityPageIds =
|
|
7483
|
+
const communityPageIds = uniqueStrings2(communityNodes.map((node) => node.pageId ?? ""));
|
|
6608
7484
|
const communityPages = communityPageIds.map((id) => pagesById.get(id)).filter((page) => Boolean(page));
|
|
6609
7485
|
const externalEdges = input.graph.edges.filter((edge) => {
|
|
6610
7486
|
const source = nodesById.get(edge.source);
|
|
6611
7487
|
const target = nodesById.get(edge.target);
|
|
6612
7488
|
return source?.communityId === input.community.id && target?.communityId && target.communityId !== input.community.id;
|
|
6613
7489
|
}).slice(0, 8);
|
|
6614
|
-
const relatedSourceIds =
|
|
7490
|
+
const relatedSourceIds = uniqueStrings2(communityNodes.flatMap((node) => node.sourceIds));
|
|
6615
7491
|
const frontmatter = {
|
|
6616
7492
|
page_id: pageId,
|
|
6617
7493
|
kind: "community_summary",
|
|
@@ -6630,7 +7506,7 @@ function buildCommunitySummaryPage(input) {
|
|
|
6630
7506
|
backlinks: ["graph:report"],
|
|
6631
7507
|
schema_hash: input.schemaHash,
|
|
6632
7508
|
source_hashes: {},
|
|
6633
|
-
related_page_ids:
|
|
7509
|
+
related_page_ids: uniqueStrings2(["graph:report", ...communityPageIds]),
|
|
6634
7510
|
related_node_ids: input.community.nodeIds,
|
|
6635
7511
|
related_source_ids: relatedSourceIds
|
|
6636
7512
|
};
|
|
@@ -6669,7 +7545,7 @@ function buildCommunitySummaryPage(input) {
|
|
|
6669
7545
|
backlinks: ["graph:report"],
|
|
6670
7546
|
schemaHash: input.schemaHash,
|
|
6671
7547
|
sourceHashes: {},
|
|
6672
|
-
relatedPageIds:
|
|
7548
|
+
relatedPageIds: uniqueStrings2(["graph:report", ...communityPageIds]),
|
|
6673
7549
|
relatedNodeIds: input.community.nodeIds,
|
|
6674
7550
|
relatedSourceIds,
|
|
6675
7551
|
createdAt: input.metadata.createdAt,
|
|
@@ -6677,11 +7553,11 @@ function buildCommunitySummaryPage(input) {
|
|
|
6677
7553
|
compiledFrom: input.metadata.compiledFrom,
|
|
6678
7554
|
managedBy: input.metadata.managedBy
|
|
6679
7555
|
},
|
|
6680
|
-
content:
|
|
7556
|
+
content: matter5.stringify(body, frontmatter)
|
|
6681
7557
|
};
|
|
6682
7558
|
}
|
|
6683
7559
|
function buildProjectsIndex(projectPages, schemaHash, metadata) {
|
|
6684
|
-
return
|
|
7560
|
+
return matter5.stringify(
|
|
6685
7561
|
[
|
|
6686
7562
|
"# Projects",
|
|
6687
7563
|
"",
|
|
@@ -6711,7 +7587,7 @@ function buildProjectsIndex(projectPages, schemaHash, metadata) {
|
|
|
6711
7587
|
}
|
|
6712
7588
|
function buildProjectIndex(input) {
|
|
6713
7589
|
const title = `Project: ${input.projectId}`;
|
|
6714
|
-
return
|
|
7590
|
+
return matter5.stringify(
|
|
6715
7591
|
[
|
|
6716
7592
|
`# ${title}`,
|
|
6717
7593
|
"",
|
|
@@ -6824,7 +7700,7 @@ function buildOutputPage(input) {
|
|
|
6824
7700
|
outputFormat: input.outputFormat,
|
|
6825
7701
|
outputAssets
|
|
6826
7702
|
},
|
|
6827
|
-
content:
|
|
7703
|
+
content: matter5.stringify(
|
|
6828
7704
|
(input.outputFormat === "slides" ? [
|
|
6829
7705
|
input.answer,
|
|
6830
7706
|
"",
|
|
@@ -6950,7 +7826,7 @@ function buildExploreHubPage(input) {
|
|
|
6950
7826
|
outputFormat: input.outputFormat,
|
|
6951
7827
|
outputAssets
|
|
6952
7828
|
},
|
|
6953
|
-
content:
|
|
7829
|
+
content: matter5.stringify(
|
|
6954
7830
|
(input.outputFormat === "slides" ? [
|
|
6955
7831
|
`# ${title}`,
|
|
6956
7832
|
"",
|
|
@@ -7216,12 +8092,12 @@ function buildOutputAssetManifest(input) {
|
|
|
7216
8092
|
// src/outputs.ts
|
|
7217
8093
|
import fs13 from "fs/promises";
|
|
7218
8094
|
import path16 from "path";
|
|
7219
|
-
import
|
|
8095
|
+
import matter7 from "gray-matter";
|
|
7220
8096
|
|
|
7221
8097
|
// src/pages.ts
|
|
7222
8098
|
import fs12 from "fs/promises";
|
|
7223
8099
|
import path15 from "path";
|
|
7224
|
-
import
|
|
8100
|
+
import matter6 from "gray-matter";
|
|
7225
8101
|
function normalizeStringArray(value) {
|
|
7226
8102
|
return Array.isArray(value) ? value.filter((item) => typeof item === "string") : [];
|
|
7227
8103
|
}
|
|
@@ -7242,6 +8118,9 @@ function normalizePageStatus(value, fallback = "active") {
|
|
|
7242
8118
|
function normalizePageManager(value, fallback = "system") {
|
|
7243
8119
|
return value === "human" || value === "system" ? value : fallback;
|
|
7244
8120
|
}
|
|
8121
|
+
function normalizeSourceType(value) {
|
|
8122
|
+
return value === "arxiv" || value === "doi" || value === "tweet" || value === "article" || value === "url" ? value : void 0;
|
|
8123
|
+
}
|
|
7245
8124
|
function normalizeOutputFormat(value, fallback = "markdown") {
|
|
7246
8125
|
return value === "report" || value === "slides" || value === "chart" || value === "image" ? value : fallback;
|
|
7247
8126
|
}
|
|
@@ -7293,7 +8172,7 @@ async function loadExistingManagedPageState(absolutePath, defaults = {}) {
|
|
|
7293
8172
|
};
|
|
7294
8173
|
}
|
|
7295
8174
|
const content = await fs12.readFile(absolutePath, "utf8");
|
|
7296
|
-
const parsed =
|
|
8175
|
+
const parsed = matter6(content);
|
|
7297
8176
|
return {
|
|
7298
8177
|
status: normalizePageStatus(parsed.data.status, defaults.status ?? "active"),
|
|
7299
8178
|
managedBy: normalizePageManager(parsed.data.managed_by, defaults.managedBy ?? "system"),
|
|
@@ -7327,7 +8206,7 @@ function inferPageKind(relativePath, explicitKind = void 0) {
|
|
|
7327
8206
|
return "index";
|
|
7328
8207
|
}
|
|
7329
8208
|
function parseStoredPage(relativePath, content, defaults = {}) {
|
|
7330
|
-
const parsed =
|
|
8209
|
+
const parsed = matter6(content);
|
|
7331
8210
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
7332
8211
|
const fallbackCreatedAt = defaults.createdAt ?? now;
|
|
7333
8212
|
const fallbackUpdatedAt = defaults.updatedAt ?? fallbackCreatedAt;
|
|
@@ -7347,6 +8226,7 @@ function parseStoredPage(relativePath, content, defaults = {}) {
|
|
|
7347
8226
|
path: relativePath,
|
|
7348
8227
|
title,
|
|
7349
8228
|
kind,
|
|
8229
|
+
sourceType: normalizeSourceType(parsed.data.source_type),
|
|
7350
8230
|
sourceIds,
|
|
7351
8231
|
projectIds,
|
|
7352
8232
|
nodeIds,
|
|
@@ -7379,7 +8259,7 @@ async function loadInsightPages(wikiDir) {
|
|
|
7379
8259
|
for (const absolutePath of files) {
|
|
7380
8260
|
const relativePath = toPosix(path15.relative(wikiDir, absolutePath));
|
|
7381
8261
|
const content = await fs12.readFile(absolutePath, "utf8");
|
|
7382
|
-
const parsed =
|
|
8262
|
+
const parsed = matter6(content);
|
|
7383
8263
|
const stats = await fs12.stat(absolutePath);
|
|
7384
8264
|
const title = typeof parsed.data.title === "string" ? parsed.data.title : path15.basename(absolutePath, ".md");
|
|
7385
8265
|
const sourceIds = normalizeStringArray(parsed.data.source_ids);
|
|
@@ -7464,7 +8344,7 @@ async function loadSavedOutputPages(wikiDir) {
|
|
|
7464
8344
|
const relativePath = path16.posix.join("outputs", entry.name);
|
|
7465
8345
|
const absolutePath = path16.join(outputsDir, entry.name);
|
|
7466
8346
|
const content = await fs13.readFile(absolutePath, "utf8");
|
|
7467
|
-
const parsed =
|
|
8347
|
+
const parsed = matter7(content);
|
|
7468
8348
|
const slug = entry.name.replace(/\.md$/, "");
|
|
7469
8349
|
const title = typeof parsed.data.title === "string" ? parsed.data.title : slug;
|
|
7470
8350
|
const pageId = typeof parsed.data.page_id === "string" ? parsed.data.page_id : `output:${slug}`;
|
|
@@ -7516,7 +8396,7 @@ async function loadSavedOutputPages(wikiDir) {
|
|
|
7516
8396
|
// src/search.ts
|
|
7517
8397
|
import fs14 from "fs/promises";
|
|
7518
8398
|
import path17 from "path";
|
|
7519
|
-
import
|
|
8399
|
+
import matter8 from "gray-matter";
|
|
7520
8400
|
function getDatabaseSync() {
|
|
7521
8401
|
const builtin = process.getBuiltinModule?.("node:sqlite");
|
|
7522
8402
|
if (!builtin?.DatabaseSync) {
|
|
@@ -7534,6 +8414,9 @@ function normalizeKind(value) {
|
|
|
7534
8414
|
function normalizeStatus(value) {
|
|
7535
8415
|
return value === "draft" || value === "candidate" || value === "active" || value === "archived" ? value : void 0;
|
|
7536
8416
|
}
|
|
8417
|
+
function normalizeSourceType2(value) {
|
|
8418
|
+
return value === "arxiv" || value === "doi" || value === "tweet" || value === "article" || value === "url" ? value : void 0;
|
|
8419
|
+
}
|
|
7537
8420
|
async function rebuildSearchIndex(dbPath, pages, wikiDir) {
|
|
7538
8421
|
await ensureDir(path17.dirname(dbPath));
|
|
7539
8422
|
const DatabaseSync = getDatabaseSync();
|
|
@@ -7549,6 +8432,7 @@ async function rebuildSearchIndex(dbPath, pages, wikiDir) {
|
|
|
7549
8432
|
body TEXT NOT NULL,
|
|
7550
8433
|
kind TEXT NOT NULL,
|
|
7551
8434
|
status TEXT NOT NULL,
|
|
8435
|
+
source_type TEXT NOT NULL,
|
|
7552
8436
|
project_ids TEXT NOT NULL,
|
|
7553
8437
|
project_key TEXT NOT NULL
|
|
7554
8438
|
);
|
|
@@ -7562,12 +8446,12 @@ async function rebuildSearchIndex(dbPath, pages, wikiDir) {
|
|
|
7562
8446
|
DELETE FROM pages;
|
|
7563
8447
|
`);
|
|
7564
8448
|
const insertPage = db.prepare(
|
|
7565
|
-
"INSERT INTO pages (id, path, title, body, kind, status, project_ids, project_key) VALUES (?, ?, ?, ?, ?, ?, ?, ?)"
|
|
8449
|
+
"INSERT INTO pages (id, path, title, body, kind, status, source_type, project_ids, project_key) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)"
|
|
7566
8450
|
);
|
|
7567
8451
|
for (const page of pages) {
|
|
7568
8452
|
const absolutePath = path17.join(wikiDir, page.path);
|
|
7569
8453
|
const content = await fs14.readFile(absolutePath, "utf8");
|
|
7570
|
-
const parsed =
|
|
8454
|
+
const parsed = matter8(content);
|
|
7571
8455
|
insertPage.run(
|
|
7572
8456
|
page.id,
|
|
7573
8457
|
page.path,
|
|
@@ -7575,6 +8459,7 @@ async function rebuildSearchIndex(dbPath, pages, wikiDir) {
|
|
|
7575
8459
|
parsed.content,
|
|
7576
8460
|
page.kind,
|
|
7577
8461
|
page.status,
|
|
8462
|
+
typeof parsed.data.source_type === "string" ? parsed.data.source_type : "",
|
|
7578
8463
|
JSON.stringify(page.projectIds),
|
|
7579
8464
|
page.projectIds.map((projectId) => `|${projectId}|`).join("")
|
|
7580
8465
|
);
|
|
@@ -7608,6 +8493,10 @@ function searchPages(dbPath, query, limitOrOptions = 5) {
|
|
|
7608
8493
|
params.push(`%|${options.project}|%`);
|
|
7609
8494
|
}
|
|
7610
8495
|
}
|
|
8496
|
+
if (options.sourceType && options.sourceType !== "all") {
|
|
8497
|
+
clauses.push("pages.source_type = ?");
|
|
8498
|
+
params.push(options.sourceType);
|
|
8499
|
+
}
|
|
7611
8500
|
const statement = db.prepare(`
|
|
7612
8501
|
SELECT
|
|
7613
8502
|
pages.id AS pageId,
|
|
@@ -7615,6 +8504,7 @@ function searchPages(dbPath, query, limitOrOptions = 5) {
|
|
|
7615
8504
|
pages.title AS title,
|
|
7616
8505
|
pages.kind AS kind,
|
|
7617
8506
|
pages.status AS status,
|
|
8507
|
+
pages.source_type AS sourceType,
|
|
7618
8508
|
pages.project_ids AS projectIds,
|
|
7619
8509
|
snippet(page_search, 1, '[', ']', '...', 16) AS snippet,
|
|
7620
8510
|
bm25(page_search) AS rank
|
|
@@ -7642,13 +8532,14 @@ function searchPages(dbPath, query, limitOrOptions = 5) {
|
|
|
7642
8532
|
title: String(row.title ?? ""),
|
|
7643
8533
|
kind: normalizeKind(row.kind),
|
|
7644
8534
|
status: normalizeStatus(row.status),
|
|
8535
|
+
sourceType: normalizeSourceType2(row.sourceType),
|
|
7645
8536
|
snippet: String(row.snippet ?? ""),
|
|
7646
8537
|
rank: Number(row.rank ?? 0)
|
|
7647
8538
|
}));
|
|
7648
8539
|
}
|
|
7649
8540
|
|
|
7650
8541
|
// src/vault.ts
|
|
7651
|
-
function
|
|
8542
|
+
function uniqueStrings3(values) {
|
|
7652
8543
|
return uniqueBy(values.filter(Boolean), (value) => value);
|
|
7653
8544
|
}
|
|
7654
8545
|
function normalizeOutputFormat2(format) {
|
|
@@ -7809,7 +8700,7 @@ async function resolveImageGenerationProvider(rootDir) {
|
|
|
7809
8700
|
if (!providerConfig) {
|
|
7810
8701
|
throw new Error(`No provider configured with id "${preferredProviderId}" for task "imageProvider".`);
|
|
7811
8702
|
}
|
|
7812
|
-
const { createProvider: createProvider2 } = await import("./registry-
|
|
8703
|
+
const { createProvider: createProvider2 } = await import("./registry-6KZMA3XM.js");
|
|
7813
8704
|
return createProvider2(preferredProviderId, providerConfig, rootDir);
|
|
7814
8705
|
}
|
|
7815
8706
|
async function generateOutputArtifacts(rootDir, input) {
|
|
@@ -8013,7 +8904,7 @@ function normalizeProjectRoot(root) {
|
|
|
8013
8904
|
function projectEntries(config) {
|
|
8014
8905
|
return Object.entries(config.projects ?? {}).map(([id, project]) => ({
|
|
8015
8906
|
id,
|
|
8016
|
-
roots:
|
|
8907
|
+
roots: uniqueStrings3(project.roots.map(normalizeProjectRoot)).filter(Boolean),
|
|
8017
8908
|
schemaPath: project.schemaPath
|
|
8018
8909
|
})).sort((left, right) => left.id.localeCompare(right.id));
|
|
8019
8910
|
}
|
|
@@ -8061,11 +8952,11 @@ function resolveSourceProjects(rootDir, manifests, config) {
|
|
|
8061
8952
|
return Object.fromEntries(manifests.map((manifest) => [manifest.sourceId, resolveSourceProjectId(rootDir, manifest, config)]));
|
|
8062
8953
|
}
|
|
8063
8954
|
function scopedProjectIdsFromSources(sourceIds, sourceProjects) {
|
|
8064
|
-
const projectIds =
|
|
8955
|
+
const projectIds = uniqueStrings3(sourceIds.map((sourceId) => sourceProjects[sourceId] ?? "").filter(Boolean));
|
|
8065
8956
|
return projectIds.length === 1 ? projectIds : [];
|
|
8066
8957
|
}
|
|
8067
8958
|
function schemaProjectIdsFromPages(pageIds, pageMap2) {
|
|
8068
|
-
return
|
|
8959
|
+
return uniqueStrings3(
|
|
8069
8960
|
pageIds.flatMap((pageId) => pageMap2.get(pageId)?.projectIds ?? []).filter(Boolean).sort((left, right) => left.localeCompare(right))
|
|
8070
8961
|
);
|
|
8071
8962
|
}
|
|
@@ -8074,7 +8965,7 @@ function categoryTagsForSchema(schema, texts) {
|
|
|
8074
8965
|
if (!haystack) {
|
|
8075
8966
|
return [];
|
|
8076
8967
|
}
|
|
8077
|
-
return
|
|
8968
|
+
return uniqueStrings3(
|
|
8078
8969
|
schemaCategoryLabels({ path: "", hash: "", content: schema.content }).filter((label) => haystack.includes(label.toLowerCase())).map((label) => `category/${slugify(label)}`)
|
|
8079
8970
|
).slice(0, 3);
|
|
8080
8971
|
}
|
|
@@ -8285,7 +9176,7 @@ async function buildManagedContent(absolutePath, defaults, build) {
|
|
|
8285
9176
|
return content;
|
|
8286
9177
|
}
|
|
8287
9178
|
function indexCompiledFrom(pages) {
|
|
8288
|
-
return
|
|
9179
|
+
return uniqueStrings3(pages.flatMap((page) => page.sourceIds));
|
|
8289
9180
|
}
|
|
8290
9181
|
function deriveGraphMetrics(nodes, edges) {
|
|
8291
9182
|
const adjacency = /* @__PURE__ */ new Map();
|
|
@@ -8678,17 +9569,42 @@ function buildGraph(manifests, analyses, pages, sourceProjects, _codeIndex) {
|
|
|
8678
9569
|
...conceptMap.values(),
|
|
8679
9570
|
...entityMap.values()
|
|
8680
9571
|
];
|
|
8681
|
-
const
|
|
9572
|
+
const enriched = enrichGraph(
|
|
9573
|
+
{
|
|
9574
|
+
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
9575
|
+
nodes: graphNodes,
|
|
9576
|
+
edges,
|
|
9577
|
+
communities: [],
|
|
9578
|
+
sources: manifests,
|
|
9579
|
+
pages
|
|
9580
|
+
},
|
|
9581
|
+
manifests,
|
|
9582
|
+
analyses
|
|
9583
|
+
);
|
|
9584
|
+
const metrics = deriveGraphMetrics(graphNodes, enriched.edges);
|
|
8682
9585
|
return {
|
|
8683
9586
|
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
8684
9587
|
nodes: metrics.nodes,
|
|
8685
|
-
edges,
|
|
9588
|
+
edges: enriched.edges,
|
|
9589
|
+
hyperedges: enriched.hyperedges,
|
|
8686
9590
|
communities: metrics.communities,
|
|
8687
9591
|
sources: manifests,
|
|
8688
9592
|
pages
|
|
8689
9593
|
};
|
|
8690
9594
|
}
|
|
8691
|
-
|
|
9595
|
+
function recentResearchSourcePages(graph, previousCompiledAt) {
|
|
9596
|
+
const previousTimestamp = previousCompiledAt ? Date.parse(previousCompiledAt) : Number.NaN;
|
|
9597
|
+
return graph.pages.filter(
|
|
9598
|
+
(page) => page.kind === "source" && Boolean(page.sourceType) && page.sourceType !== "url"
|
|
9599
|
+
).filter((page) => Number.isNaN(previousTimestamp) || Date.parse(page.updatedAt) > previousTimestamp).sort((left, right) => right.updatedAt.localeCompare(left.updatedAt) || left.title.localeCompare(right.title)).slice(0, 8).map((page) => ({
|
|
9600
|
+
id: page.id,
|
|
9601
|
+
path: page.path,
|
|
9602
|
+
title: page.title,
|
|
9603
|
+
updatedAt: page.updatedAt,
|
|
9604
|
+
sourceType: page.sourceType
|
|
9605
|
+
}));
|
|
9606
|
+
}
|
|
9607
|
+
async function buildGraphOrientationPages(graph, paths, schemaHash, previousCompiledAt) {
|
|
8692
9608
|
const benchmark = await readJsonFile(paths.benchmarkPath);
|
|
8693
9609
|
const communityRecords = [];
|
|
8694
9610
|
for (const community of graph.communities ?? []) {
|
|
@@ -8698,7 +9614,7 @@ async function buildGraphOrientationPages(graph, paths, schemaHash) {
|
|
|
8698
9614
|
absolutePath,
|
|
8699
9615
|
{
|
|
8700
9616
|
managedBy: "system",
|
|
8701
|
-
compiledFrom:
|
|
9617
|
+
compiledFrom: uniqueStrings3(
|
|
8702
9618
|
community.nodeIds.flatMap((nodeId) => graph.nodes.find((node) => node.id === nodeId)?.sourceIds ?? [])
|
|
8703
9619
|
),
|
|
8704
9620
|
confidence: 1
|
|
@@ -8712,23 +9628,33 @@ async function buildGraphOrientationPages(graph, paths, schemaHash) {
|
|
|
8712
9628
|
)
|
|
8713
9629
|
);
|
|
8714
9630
|
}
|
|
9631
|
+
const report = buildGraphReportArtifact({
|
|
9632
|
+
graph,
|
|
9633
|
+
communityPages: communityRecords.map((record) => record.page),
|
|
9634
|
+
benchmark,
|
|
9635
|
+
benchmarkStale: benchmark ? benchmark.graphHash !== graphHash(graph) : false,
|
|
9636
|
+
recentResearchSources: recentResearchSourcePages(graph, previousCompiledAt),
|
|
9637
|
+
graphHash: graphHash(graph)
|
|
9638
|
+
});
|
|
8715
9639
|
const reportAbsolutePath = path18.join(paths.wikiDir, "graph", "report.md");
|
|
8716
9640
|
const reportRecord = await buildManagedGraphPage(
|
|
8717
9641
|
reportAbsolutePath,
|
|
8718
9642
|
{
|
|
8719
9643
|
managedBy: "system",
|
|
8720
|
-
compiledFrom:
|
|
9644
|
+
compiledFrom: uniqueStrings3(graph.pages.flatMap((page) => page.sourceIds)),
|
|
8721
9645
|
confidence: 1
|
|
8722
9646
|
},
|
|
8723
9647
|
(metadata) => buildGraphReportPage({
|
|
8724
9648
|
graph,
|
|
8725
9649
|
schemaHash,
|
|
8726
9650
|
metadata,
|
|
8727
|
-
|
|
8728
|
-
benchmark
|
|
9651
|
+
report
|
|
8729
9652
|
})
|
|
8730
9653
|
);
|
|
8731
|
-
return
|
|
9654
|
+
return {
|
|
9655
|
+
records: [reportRecord, ...communityRecords],
|
|
9656
|
+
report
|
|
9657
|
+
};
|
|
8732
9658
|
}
|
|
8733
9659
|
async function writePage(wikiDir, relativePath, content, changedPages) {
|
|
8734
9660
|
const absolutePath = path18.resolve(wikiDir, relativePath);
|
|
@@ -9025,7 +9951,7 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
9025
9951
|
const itemKind = kind === "concepts" ? "concept" : "entity";
|
|
9026
9952
|
const slug = slugify(aggregate.name);
|
|
9027
9953
|
const pageId = `${itemKind}:${slug}`;
|
|
9028
|
-
const sourceIds =
|
|
9954
|
+
const sourceIds = uniqueStrings3(aggregate.sourceAnalyses.map((item) => item.sourceId));
|
|
9029
9955
|
const projectIds = scopedProjectIdsFromSources(sourceIds, input.sourceProjects);
|
|
9030
9956
|
const schemaHash = effectiveHashForProject(input.schemas, projectIds[0] ?? null);
|
|
9031
9957
|
const previousEntry = input.previousState?.candidateHistory?.[pageId];
|
|
@@ -9091,9 +10017,9 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
9091
10017
|
const compiledPages = records.map((record) => record.page);
|
|
9092
10018
|
const basePages = [...compiledPages, ...input.outputPages, ...input.insightPages];
|
|
9093
10019
|
const baseGraph = buildGraph(input.manifests, input.analyses, basePages, input.sourceProjects, input.codeIndex);
|
|
9094
|
-
const
|
|
9095
|
-
records.push(...
|
|
9096
|
-
const allPages = [...basePages, ...
|
|
10020
|
+
const graphOrientation = await buildGraphOrientationPages(baseGraph, paths, globalSchemaHash, input.previousState?.generatedAt);
|
|
10021
|
+
records.push(...graphOrientation.records);
|
|
10022
|
+
const allPages = [...basePages, ...graphOrientation.records.map((record) => record.page)];
|
|
9097
10023
|
const graph = {
|
|
9098
10024
|
...baseGraph,
|
|
9099
10025
|
pages: allPages
|
|
@@ -9226,7 +10152,7 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
9226
10152
|
const nextPagePaths = new Set(records.map((record) => record.page.path));
|
|
9227
10153
|
const obsoleteGraphPaths = (previousGraph?.pages ?? []).filter((page) => page.kind !== "output" && page.kind !== "insight").map((page) => page.path).filter((relativePath) => !nextPagePaths.has(relativePath));
|
|
9228
10154
|
const existingProjectIndexPaths = (await listFilesRecursive(paths.projectsDir)).filter((absolutePath) => absolutePath.endsWith(".md")).map((absolutePath) => toPosix(path18.relative(paths.wikiDir, absolutePath))).filter((relativePath) => !nextPagePaths.has(relativePath));
|
|
9229
|
-
const obsoletePaths =
|
|
10155
|
+
const obsoletePaths = uniqueStrings3([...obsoleteGraphPaths, ...existingProjectIndexPaths]);
|
|
9230
10156
|
const changedFiles = [];
|
|
9231
10157
|
for (const record of records) {
|
|
9232
10158
|
const absolutePath = path18.join(paths.wikiDir, record.page.path);
|
|
@@ -9258,6 +10184,7 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
9258
10184
|
await fs15.rm(path18.join(paths.wikiDir, relativePath), { force: true });
|
|
9259
10185
|
}
|
|
9260
10186
|
await writeJsonFile(paths.graphPath, graph);
|
|
10187
|
+
await writeJsonFile(path18.join(paths.wikiDir, "graph", "report.json"), graphOrientation.report);
|
|
9261
10188
|
await writeJsonFile(paths.codeIndexPath, input.codeIndex);
|
|
9262
10189
|
await writeJsonFile(paths.compileStatePath, {
|
|
9263
10190
|
generatedAt: graph.generatedAt,
|
|
@@ -9283,7 +10210,7 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
9283
10210
|
return {
|
|
9284
10211
|
graph,
|
|
9285
10212
|
allPages,
|
|
9286
|
-
changedPages:
|
|
10213
|
+
changedPages: uniqueStrings3([...changedPages, ...writeChanges]),
|
|
9287
10214
|
promotedPageIds,
|
|
9288
10215
|
candidatePageCount: candidatePages.length,
|
|
9289
10216
|
staged: false
|
|
@@ -9292,18 +10219,20 @@ async function syncVaultArtifacts(rootDir, input) {
|
|
|
9292
10219
|
async function refreshIndexesAndSearch(rootDir, pages) {
|
|
9293
10220
|
const { config, paths } = await loadVaultConfig(rootDir);
|
|
9294
10221
|
const schemas = await loadVaultSchemas(rootDir);
|
|
10222
|
+
const compileState = await readJsonFile(paths.compileStatePath);
|
|
9295
10223
|
const globalSchemaHash = schemas.effective.global.hash;
|
|
9296
10224
|
const currentGraph = await readJsonFile(paths.graphPath);
|
|
9297
10225
|
const basePages = pages.filter((page) => page.kind !== "graph_report" && page.kind !== "community_summary");
|
|
9298
|
-
const
|
|
10226
|
+
const graphOrientation = currentGraph ? await buildGraphOrientationPages(
|
|
9299
10227
|
{
|
|
9300
10228
|
...currentGraph,
|
|
9301
10229
|
pages: basePages
|
|
9302
10230
|
},
|
|
9303
10231
|
paths,
|
|
9304
|
-
globalSchemaHash
|
|
9305
|
-
|
|
9306
|
-
|
|
10232
|
+
globalSchemaHash,
|
|
10233
|
+
compileState?.generatedAt
|
|
10234
|
+
) : { records: [], report: null };
|
|
10235
|
+
const pagesWithGraph = sortGraphPages([...basePages, ...graphOrientation.records.map((record) => record.page)]);
|
|
9307
10236
|
if (currentGraph) {
|
|
9308
10237
|
await writeJsonFile(paths.graphPath, {
|
|
9309
10238
|
...currentGraph,
|
|
@@ -9409,9 +10338,12 @@ async function refreshIndexesAndSearch(rootDir, pages) {
|
|
|
9409
10338
|
)
|
|
9410
10339
|
);
|
|
9411
10340
|
}
|
|
9412
|
-
for (const record of
|
|
10341
|
+
for (const record of graphOrientation.records) {
|
|
9413
10342
|
await writeFileIfChanged(path18.join(paths.wikiDir, record.page.path), record.content);
|
|
9414
10343
|
}
|
|
10344
|
+
if (graphOrientation.report) {
|
|
10345
|
+
await writeJsonFile(path18.join(paths.wikiDir, "graph", "report.json"), graphOrientation.report);
|
|
10346
|
+
}
|
|
9415
10347
|
const existingProjectIndexPaths = (await listFilesRecursive(paths.projectsDir)).filter((absolutePath) => absolutePath.endsWith(".md")).map((absolutePath) => toPosix(path18.relative(paths.wikiDir, absolutePath)));
|
|
9416
10348
|
const allowedProjectIndexPaths = /* @__PURE__ */ new Set([
|
|
9417
10349
|
"projects/index.md",
|
|
@@ -9421,7 +10353,7 @@ async function refreshIndexesAndSearch(rootDir, pages) {
|
|
|
9421
10353
|
existingProjectIndexPaths.filter((relativePath) => !allowedProjectIndexPaths.has(relativePath)).map((relativePath) => fs15.rm(path18.join(paths.wikiDir, relativePath), { force: true }))
|
|
9422
10354
|
);
|
|
9423
10355
|
const existingGraphPages = (await listFilesRecursive(path18.join(paths.wikiDir, "graph").replace(/\/$/, "")).catch(() => [])).filter((absolutePath) => absolutePath.endsWith(".md")).map((absolutePath) => toPosix(path18.relative(paths.wikiDir, absolutePath)));
|
|
9424
|
-
const allowedGraphPages = /* @__PURE__ */ new Set(["graph/index.md", ...
|
|
10356
|
+
const allowedGraphPages = /* @__PURE__ */ new Set(["graph/index.md", ...graphOrientation.records.map((record) => record.page.path)]);
|
|
9425
10357
|
await Promise.all(
|
|
9426
10358
|
existingGraphPages.filter((relativePath) => !allowedGraphPages.has(relativePath)).map((relativePath) => fs15.rm(path18.join(paths.wikiDir, relativePath), { force: true }))
|
|
9427
10359
|
);
|
|
@@ -9438,7 +10370,7 @@ async function prepareOutputPageSave(rootDir, input) {
|
|
|
9438
10370
|
status: "active",
|
|
9439
10371
|
createdAt: now,
|
|
9440
10372
|
updatedAt: now,
|
|
9441
|
-
compiledFrom:
|
|
10373
|
+
compiledFrom: uniqueStrings3(input.relatedSourceIds ?? input.citations),
|
|
9442
10374
|
managedBy: "system",
|
|
9443
10375
|
confidence: 0.74
|
|
9444
10376
|
}
|
|
@@ -9479,7 +10411,7 @@ async function prepareExploreHubSave(rootDir, input) {
|
|
|
9479
10411
|
status: "active",
|
|
9480
10412
|
createdAt: now,
|
|
9481
10413
|
updatedAt: now,
|
|
9482
|
-
compiledFrom:
|
|
10414
|
+
compiledFrom: uniqueStrings3(input.citations),
|
|
9483
10415
|
managedBy: "system",
|
|
9484
10416
|
confidence: 0.76
|
|
9485
10417
|
}
|
|
@@ -9542,6 +10474,7 @@ async function stageOutputApprovalBundle(rootDir, stagedPages) {
|
|
|
9542
10474
|
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
9543
10475
|
nodes: previousGraph?.nodes ?? [],
|
|
9544
10476
|
edges: previousGraph?.edges ?? [],
|
|
10477
|
+
hyperedges: previousGraph?.hyperedges ?? [],
|
|
9545
10478
|
sources: previousGraph?.sources ?? [],
|
|
9546
10479
|
pages: nextPages
|
|
9547
10480
|
};
|
|
@@ -9577,7 +10510,7 @@ async function executeQuery(rootDir, question, format) {
|
|
|
9577
10510
|
const absolutePath = path18.join(paths.wikiDir, result.path);
|
|
9578
10511
|
try {
|
|
9579
10512
|
const content = await fs15.readFile(absolutePath, "utf8");
|
|
9580
|
-
const parsed =
|
|
10513
|
+
const parsed = matter9(content);
|
|
9581
10514
|
return `# ${result.title}
|
|
9582
10515
|
${truncate(normalizeWhitespace(parsed.content), 1200)}`;
|
|
9583
10516
|
} catch {
|
|
@@ -9850,6 +10783,7 @@ async function acceptApproval(rootDir, approvalId, targets = []) {
|
|
|
9850
10783
|
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
9851
10784
|
nodes: currentGraph?.nodes ?? bundleGraph?.nodes ?? [],
|
|
9852
10785
|
edges: currentGraph?.edges ?? bundleGraph?.edges ?? [],
|
|
10786
|
+
hyperedges: currentGraph?.hyperedges ?? bundleGraph?.hyperedges ?? [],
|
|
9853
10787
|
sources: currentGraph?.sources ?? bundleGraph?.sources ?? [],
|
|
9854
10788
|
pages: sortGraphPages(nextPages)
|
|
9855
10789
|
};
|
|
@@ -9927,13 +10861,13 @@ async function promoteCandidate(rootDir, target) {
|
|
|
9927
10861
|
const graph = await readJsonFile(paths.graphPath);
|
|
9928
10862
|
const candidate = resolveCandidateTarget(graph?.pages ?? [], target);
|
|
9929
10863
|
const raw = await fs15.readFile(path18.join(paths.wikiDir, candidate.path), "utf8");
|
|
9930
|
-
const parsed =
|
|
10864
|
+
const parsed = matter9(raw);
|
|
9931
10865
|
const nextUpdatedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
9932
|
-
const nextContent =
|
|
10866
|
+
const nextContent = matter9.stringify(parsed.content, {
|
|
9933
10867
|
...parsed.data,
|
|
9934
10868
|
status: "active",
|
|
9935
10869
|
updated_at: nextUpdatedAt,
|
|
9936
|
-
tags:
|
|
10870
|
+
tags: uniqueStrings3([candidate.kind, ...Array.isArray(parsed.data.tags) ? parsed.data.tags : []]).filter(
|
|
9937
10871
|
(tag) => tag !== "candidate"
|
|
9938
10872
|
)
|
|
9939
10873
|
});
|
|
@@ -9950,6 +10884,7 @@ async function promoteCandidate(rootDir, target) {
|
|
|
9950
10884
|
generatedAt: nextUpdatedAt,
|
|
9951
10885
|
nodes: graph?.nodes ?? [],
|
|
9952
10886
|
edges: graph?.edges ?? [],
|
|
10887
|
+
hyperedges: graph?.hyperedges ?? [],
|
|
9953
10888
|
sources: graph?.sources ?? [],
|
|
9954
10889
|
pages: nextPages
|
|
9955
10890
|
};
|
|
@@ -9991,6 +10926,7 @@ async function archiveCandidate(rootDir, target) {
|
|
|
9991
10926
|
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
9992
10927
|
nodes: graph?.nodes ?? [],
|
|
9993
10928
|
edges: graph?.edges ?? [],
|
|
10929
|
+
hyperedges: graph?.hyperedges ?? [],
|
|
9994
10930
|
sources: graph?.sources ?? [],
|
|
9995
10931
|
pages: nextPages
|
|
9996
10932
|
};
|
|
@@ -10075,7 +11011,7 @@ async function initVault(rootDir, options = {}) {
|
|
|
10075
11011
|
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
10076
11012
|
await writeFileIfChanged(
|
|
10077
11013
|
insightsIndexPath,
|
|
10078
|
-
|
|
11014
|
+
matter9.stringify(
|
|
10079
11015
|
[
|
|
10080
11016
|
"# Insights",
|
|
10081
11017
|
"",
|
|
@@ -10108,7 +11044,7 @@ async function initVault(rootDir, options = {}) {
|
|
|
10108
11044
|
);
|
|
10109
11045
|
await writeFileIfChanged(
|
|
10110
11046
|
path18.join(paths.wikiDir, "projects", "index.md"),
|
|
10111
|
-
|
|
11047
|
+
matter9.stringify(["# Projects", "", "- Run `swarmvault compile` to build project rollups.", ""].join("\n"), {
|
|
10112
11048
|
page_id: "projects:index",
|
|
10113
11049
|
kind: "index",
|
|
10114
11050
|
title: "Projects",
|
|
@@ -10130,7 +11066,7 @@ async function initVault(rootDir, options = {}) {
|
|
|
10130
11066
|
);
|
|
10131
11067
|
await writeFileIfChanged(
|
|
10132
11068
|
path18.join(paths.wikiDir, "candidates", "index.md"),
|
|
10133
|
-
|
|
11069
|
+
matter9.stringify(["# Candidates", "", "- Run `swarmvault compile` to stage candidate pages.", ""].join("\n"), {
|
|
10134
11070
|
page_id: "candidates:index",
|
|
10135
11071
|
kind: "index",
|
|
10136
11072
|
title: "Candidates",
|
|
@@ -10154,6 +11090,20 @@ async function initVault(rootDir, options = {}) {
|
|
|
10154
11090
|
await ensureObsidianWorkspace(rootDir);
|
|
10155
11091
|
}
|
|
10156
11092
|
}
|
|
11093
|
+
async function runConfiguredBenchmark(rootDir, config) {
|
|
11094
|
+
if (config.benchmark?.enabled === false) {
|
|
11095
|
+
return { ok: true };
|
|
11096
|
+
}
|
|
11097
|
+
try {
|
|
11098
|
+
await benchmarkVault(rootDir);
|
|
11099
|
+
return { ok: true };
|
|
11100
|
+
} catch (error) {
|
|
11101
|
+
return {
|
|
11102
|
+
ok: false,
|
|
11103
|
+
error: error instanceof Error ? error.message : String(error)
|
|
11104
|
+
};
|
|
11105
|
+
}
|
|
11106
|
+
}
|
|
10157
11107
|
async function compileVault(rootDir, options = {}) {
|
|
10158
11108
|
const startedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
10159
11109
|
const { config, paths } = await initWorkspace(rootDir);
|
|
@@ -10169,7 +11119,7 @@ async function compileVault(rootDir, options = {}) {
|
|
|
10169
11119
|
const currentInsightHashes = pageHashes(storedInsightPages);
|
|
10170
11120
|
const previousState = await readJsonFile(paths.compileStatePath);
|
|
10171
11121
|
const rootSchemaChanged = !previousState || previousState.rootSchemaHash !== schemas.root.hash;
|
|
10172
|
-
const effectiveSchemaChanged = !previousState || previousGlobalSchemaHash(previousState) !== schemas.effective.global.hash ||
|
|
11122
|
+
const effectiveSchemaChanged = !previousState || previousGlobalSchemaHash(previousState) !== schemas.effective.global.hash || uniqueStrings3([...Object.keys(previousState?.effectiveSchemaHashes?.projects ?? {}), ...Object.keys(schemas.effective.projects)]).some(
|
|
10173
11123
|
(projectId) => previousProjectSchemaHash(previousState, projectId) !== effectiveHashForProject(schemas, projectId)
|
|
10174
11124
|
);
|
|
10175
11125
|
const nextProjectConfigHash = projectConfigHash(config);
|
|
@@ -10202,6 +11152,10 @@ async function compileVault(rootDir, options = {}) {
|
|
|
10202
11152
|
}
|
|
10203
11153
|
if (dirty.length === 0 && !rootSchemaChanged && !effectiveSchemaChanged && !projectConfigChanged && !sourcesChanged && !outputsChanged && !insightsChanged && !pendingCandidatePromotion && artifactsExist && !options.approve) {
|
|
10204
11154
|
const graph = await readJsonFile(paths.graphPath);
|
|
11155
|
+
const benchmark2 = await runConfiguredBenchmark(rootDir, config);
|
|
11156
|
+
if (graph && benchmark2.ok) {
|
|
11157
|
+
await refreshIndexesAndSearch(rootDir, graph.pages);
|
|
11158
|
+
}
|
|
10205
11159
|
await recordSession(rootDir, {
|
|
10206
11160
|
operation: "compile",
|
|
10207
11161
|
title: `Compiled ${manifests.length} source(s)`,
|
|
@@ -10219,7 +11173,8 @@ async function compileVault(rootDir, options = {}) {
|
|
|
10219
11173
|
`clean=${manifests.length}`,
|
|
10220
11174
|
`outputs=${outputPages.length}`,
|
|
10221
11175
|
`insights=${insightPages.length}`,
|
|
10222
|
-
`schema=${schemas.effective.global.hash.slice(0, 12)}
|
|
11176
|
+
`schema=${schemas.effective.global.hash.slice(0, 12)}`,
|
|
11177
|
+
`benchmark=${benchmark2.ok ? "ok" : `error:${benchmark2.error}`}`
|
|
10223
11178
|
]
|
|
10224
11179
|
});
|
|
10225
11180
|
return {
|
|
@@ -10337,6 +11292,10 @@ async function compileVault(rootDir, options = {}) {
|
|
|
10337
11292
|
postPassApprovalDir = staged.approvalDir;
|
|
10338
11293
|
}
|
|
10339
11294
|
}
|
|
11295
|
+
const benchmark = options.approve ? { ok: true } : await runConfiguredBenchmark(rootDir, config);
|
|
11296
|
+
if (!options.approve && benchmark.ok) {
|
|
11297
|
+
await refreshIndexesAndSearch(rootDir, sync.allPages);
|
|
11298
|
+
}
|
|
10340
11299
|
await recordSession(rootDir, {
|
|
10341
11300
|
operation: "compile",
|
|
10342
11301
|
title: `Compiled ${manifests.length} source(s)`,
|
|
@@ -10358,7 +11317,8 @@ async function compileVault(rootDir, options = {}) {
|
|
|
10358
11317
|
`promoted=${sync.promotedPageIds.length}`,
|
|
10359
11318
|
`staged=${sync.staged}`,
|
|
10360
11319
|
`postPassApproval=${postPassApprovalId ?? "none"}`,
|
|
10361
|
-
`schema=${schemas.effective.global.hash.slice(0, 12)}
|
|
11320
|
+
`schema=${schemas.effective.global.hash.slice(0, 12)}`,
|
|
11321
|
+
`benchmark=${benchmark.ok ? "ok" : `error:${benchmark.error}`}`
|
|
10362
11322
|
]
|
|
10363
11323
|
});
|
|
10364
11324
|
return {
|
|
@@ -10628,7 +11588,7 @@ ${orchestrationNotes.join("\n")}
|
|
|
10628
11588
|
citations: allCitations,
|
|
10629
11589
|
format: outputFormat,
|
|
10630
11590
|
relatedPageCount: stepPages.length,
|
|
10631
|
-
relatedNodeCount:
|
|
11591
|
+
relatedNodeCount: uniqueStrings3(stepPages.flatMap((page) => page.nodeIds)).length,
|
|
10632
11592
|
projectId: stepPages[0]?.projectIds[0] ?? null
|
|
10633
11593
|
});
|
|
10634
11594
|
const hubInput = {
|
|
@@ -10638,7 +11598,7 @@ ${orchestrationNotes.join("\n")}
|
|
|
10638
11598
|
citations: allCitations,
|
|
10639
11599
|
schemaHash: composeVaultSchema(
|
|
10640
11600
|
schemas.root,
|
|
10641
|
-
|
|
11601
|
+
uniqueStrings3(stepPages.flatMap((page) => page.projectIds).sort((left, right) => left.localeCompare(right))).map((projectId) => schemas.projects[projectId]).filter((schema) => Boolean(schema?.hash))
|
|
10642
11602
|
).hash,
|
|
10643
11603
|
outputFormat,
|
|
10644
11604
|
outputAssets: hubAssetBundle.outputAssets,
|
|
@@ -10698,7 +11658,7 @@ ${orchestrationNotes.join("\n")}
|
|
|
10698
11658
|
providerId: provider.id,
|
|
10699
11659
|
success: true,
|
|
10700
11660
|
relatedSourceIds: [...relatedSourceIds],
|
|
10701
|
-
relatedPageIds:
|
|
11661
|
+
relatedPageIds: uniqueStrings3([...relatedPageIds, ...stepPages.map((page) => page.id), hubPage.id]),
|
|
10702
11662
|
relatedNodeIds: [...relatedNodeIds],
|
|
10703
11663
|
citations: allCitations,
|
|
10704
11664
|
tokenUsage: tokenUsage.inputTokens > 0 || tokenUsage.outputTokens > 0 ? {
|
|
@@ -10753,7 +11713,7 @@ async function queryGraphVault(rootDir, question, options = {}) {
|
|
|
10753
11713
|
return queryGraph(graph, question, searchResults, options);
|
|
10754
11714
|
}
|
|
10755
11715
|
async function benchmarkVault(rootDir, options = {}) {
|
|
10756
|
-
const { paths } = await loadVaultConfig(rootDir);
|
|
11716
|
+
const { config, paths } = await loadVaultConfig(rootDir);
|
|
10757
11717
|
const graph = await ensureCompiledGraph(rootDir);
|
|
10758
11718
|
const manifests = await listManifests(rootDir);
|
|
10759
11719
|
const pageContentsById = /* @__PURE__ */ new Map();
|
|
@@ -10769,11 +11729,13 @@ async function benchmarkVault(rootDir, options = {}) {
|
|
|
10769
11729
|
if (!await fileExists(absolutePath)) {
|
|
10770
11730
|
continue;
|
|
10771
11731
|
}
|
|
10772
|
-
const parsed =
|
|
11732
|
+
const parsed = matter9(await fs15.readFile(absolutePath, "utf8"));
|
|
10773
11733
|
pageContentsById.set(page.id, parsed.content);
|
|
10774
11734
|
}
|
|
11735
|
+
const configuredQuestions = (config.benchmark?.questions ?? []).map((question) => normalizeWhitespace(question)).filter(Boolean);
|
|
11736
|
+
const maxQuestions = Math.max(1, options.maxQuestions ?? config.benchmark?.maxQuestions ?? 3);
|
|
10775
11737
|
const questions = (options.questions ?? []).map((question) => normalizeWhitespace(question)).filter(Boolean);
|
|
10776
|
-
const sampleQuestions = questions.length ? questions :
|
|
11738
|
+
const sampleQuestions = (questions.length ? questions : configuredQuestions.length ? configuredQuestions : defaultBenchmarkQuestionsForGraph(graph, maxQuestions)).slice(0, maxQuestions);
|
|
10777
11739
|
const perQuestion = sampleQuestions.map((question) => {
|
|
10778
11740
|
const searchResults = searchPages(paths.searchDbPath, question, { limit: 12 });
|
|
10779
11741
|
const result = queryGraph(graph, question, searchResults, { budget: 12 });
|
|
@@ -10783,6 +11745,7 @@ async function benchmarkVault(rootDir, options = {}) {
|
|
|
10783
11745
|
queryTokens: metrics.queryTokens,
|
|
10784
11746
|
reduction: metrics.reduction,
|
|
10785
11747
|
visitedNodeIds: result.visitedNodeIds,
|
|
11748
|
+
visitedEdgeIds: result.visitedEdgeIds,
|
|
10786
11749
|
pageIds: result.pageIds
|
|
10787
11750
|
};
|
|
10788
11751
|
});
|
|
@@ -10804,6 +11767,14 @@ async function explainGraphVault(rootDir, target) {
|
|
|
10804
11767
|
const graph = await ensureCompiledGraph(rootDir);
|
|
10805
11768
|
return explainGraphTarget(graph, target);
|
|
10806
11769
|
}
|
|
11770
|
+
async function listGraphHyperedges(rootDir, target, limit = 25) {
|
|
11771
|
+
const graph = await ensureCompiledGraph(rootDir);
|
|
11772
|
+
return listHyperedges(graph, target, limit);
|
|
11773
|
+
}
|
|
11774
|
+
async function readGraphReport(rootDir) {
|
|
11775
|
+
const { paths } = await loadVaultConfig(rootDir);
|
|
11776
|
+
return readJsonFile(path18.join(paths.wikiDir, "graph", "report.json"));
|
|
11777
|
+
}
|
|
10807
11778
|
async function listGodNodes(rootDir, limit = 10) {
|
|
10808
11779
|
const graph = await ensureCompiledGraph(rootDir);
|
|
10809
11780
|
return topGodNodes(graph, limit);
|
|
@@ -10820,7 +11791,7 @@ async function readPage(rootDir, relativePath) {
|
|
|
10820
11791
|
return null;
|
|
10821
11792
|
}
|
|
10822
11793
|
const raw = await fs15.readFile(absolutePath, "utf8");
|
|
10823
|
-
const parsed =
|
|
11794
|
+
const parsed = matter9(raw);
|
|
10824
11795
|
return {
|
|
10825
11796
|
path: relativePath,
|
|
10826
11797
|
title: typeof parsed.data.title === "string" ? parsed.data.title : path18.basename(relativePath, path18.extname(relativePath)),
|
|
@@ -10947,7 +11918,7 @@ async function lintVault(rootDir, options = {}) {
|
|
|
10947
11918
|
providerId: provider?.id,
|
|
10948
11919
|
success: true,
|
|
10949
11920
|
relatedPageIds: graph.pages.map((page) => page.id),
|
|
10950
|
-
relatedSourceIds:
|
|
11921
|
+
relatedSourceIds: uniqueStrings3(graph.pages.flatMap((page) => page.sourceIds)),
|
|
10951
11922
|
lintFindingCount: findings.length,
|
|
10952
11923
|
lines: [`findings=${findings.length}`, `deep=${Boolean(options.deep)}`, `web=${Boolean(options.web)}`]
|
|
10953
11924
|
});
|
|
@@ -10967,7 +11938,7 @@ async function bootstrapDemo(rootDir, input) {
|
|
|
10967
11938
|
}
|
|
10968
11939
|
|
|
10969
11940
|
// src/mcp.ts
|
|
10970
|
-
var SERVER_VERSION = "0.1.
|
|
11941
|
+
var SERVER_VERSION = "0.1.24";
|
|
10971
11942
|
async function createMcpServer(rootDir) {
|
|
10972
11943
|
const server = new McpServer({
|
|
10973
11944
|
name: "swarmvault",
|
|
@@ -11045,10 +12016,19 @@ async function createMcpServer(rootDir) {
|
|
|
11045
12016
|
return asToolText(result);
|
|
11046
12017
|
}
|
|
11047
12018
|
);
|
|
12019
|
+
server.registerTool(
|
|
12020
|
+
"graph_report",
|
|
12021
|
+
{
|
|
12022
|
+
description: "Return the machine-readable graph report and trust artifact."
|
|
12023
|
+
},
|
|
12024
|
+
async () => {
|
|
12025
|
+
return asToolText(await readGraphReport(rootDir) ?? { error: "Graph report not found. Run `swarmvault compile` first." });
|
|
12026
|
+
}
|
|
12027
|
+
);
|
|
11048
12028
|
server.registerTool(
|
|
11049
12029
|
"get_node",
|
|
11050
12030
|
{
|
|
11051
|
-
description: "Explain a graph node, its page, community, and
|
|
12031
|
+
description: "Explain a graph node, its page, community, neighbors, and group patterns.",
|
|
11052
12032
|
inputSchema: {
|
|
11053
12033
|
target: z8.string().min(1).describe("Node or page label/id")
|
|
11054
12034
|
}
|
|
@@ -11057,6 +12037,19 @@ async function createMcpServer(rootDir) {
|
|
|
11057
12037
|
return asToolText(await explainGraphVault(rootDir, target));
|
|
11058
12038
|
}
|
|
11059
12039
|
);
|
|
12040
|
+
server.registerTool(
|
|
12041
|
+
"get_hyperedges",
|
|
12042
|
+
{
|
|
12043
|
+
description: "List graph hyperedges, optionally filtered to a node or page target.",
|
|
12044
|
+
inputSchema: {
|
|
12045
|
+
target: z8.string().optional().describe("Optional node/page label or id to filter by"),
|
|
12046
|
+
limit: z8.number().int().min(1).max(50).optional().describe("Maximum hyperedges to return")
|
|
12047
|
+
}
|
|
12048
|
+
},
|
|
12049
|
+
async ({ target, limit }) => {
|
|
12050
|
+
return asToolText(await listGraphHyperedges(rootDir, target, limit ?? 25));
|
|
12051
|
+
}
|
|
12052
|
+
);
|
|
11060
12053
|
server.registerTool(
|
|
11061
12054
|
"get_neighbors",
|
|
11062
12055
|
{
|
|
@@ -11598,7 +12591,7 @@ import fs18 from "fs/promises";
|
|
|
11598
12591
|
import http from "http";
|
|
11599
12592
|
import path22 from "path";
|
|
11600
12593
|
import { promisify } from "util";
|
|
11601
|
-
import
|
|
12594
|
+
import matter10 from "gray-matter";
|
|
11602
12595
|
import mime2 from "mime-types";
|
|
11603
12596
|
|
|
11604
12597
|
// src/watch.ts
|
|
@@ -11999,7 +12992,7 @@ async function readViewerPage(rootDir, relativePath) {
|
|
|
11999
12992
|
return null;
|
|
12000
12993
|
}
|
|
12001
12994
|
const raw = await fs18.readFile(absolutePath, "utf8");
|
|
12002
|
-
const parsed =
|
|
12995
|
+
const parsed = matter10(raw);
|
|
12003
12996
|
return {
|
|
12004
12997
|
path: relativePath,
|
|
12005
12998
|
title: typeof parsed.data.title === "string" ? parsed.data.title : path22.basename(relativePath, path22.extname(relativePath)),
|
|
@@ -12102,16 +13095,29 @@ async function startGraphServer(rootDir, port) {
|
|
|
12102
13095
|
const kind = url.searchParams.get("kind") ?? "all";
|
|
12103
13096
|
const status = url.searchParams.get("status") ?? "all";
|
|
12104
13097
|
const project = url.searchParams.get("project") ?? "all";
|
|
13098
|
+
const sourceType = url.searchParams.get("sourceType") ?? "all";
|
|
12105
13099
|
const results = searchPages(paths.searchDbPath, query, {
|
|
12106
13100
|
limit: Number.isFinite(limit) ? limit : 10,
|
|
12107
13101
|
kind,
|
|
12108
13102
|
status,
|
|
12109
|
-
project
|
|
13103
|
+
project,
|
|
13104
|
+
sourceType
|
|
12110
13105
|
});
|
|
12111
13106
|
response.writeHead(200, { "content-type": "application/json" });
|
|
12112
13107
|
response.end(JSON.stringify(results));
|
|
12113
13108
|
return;
|
|
12114
13109
|
}
|
|
13110
|
+
if (url.pathname === "/api/graph-report") {
|
|
13111
|
+
const reportPath = path22.join(paths.wikiDir, "graph", "report.json");
|
|
13112
|
+
if (!await fileExists(reportPath)) {
|
|
13113
|
+
response.writeHead(404, { "content-type": "application/json" });
|
|
13114
|
+
response.end(JSON.stringify({ error: "Graph report artifact not found. Run `swarmvault compile` first." }));
|
|
13115
|
+
return;
|
|
13116
|
+
}
|
|
13117
|
+
response.writeHead(200, { "content-type": "application/json" });
|
|
13118
|
+
response.end(await fs18.readFile(reportPath, "utf8"));
|
|
13119
|
+
return;
|
|
13120
|
+
}
|
|
12115
13121
|
if (url.pathname === "/api/watch-status") {
|
|
12116
13122
|
response.writeHead(200, { "content-type": "application/json" });
|
|
12117
13123
|
response.end(JSON.stringify(await getWatchStatus(rootDir)));
|
|
@@ -12241,6 +13247,7 @@ async function exportGraphHtml(rootDir, outputPath) {
|
|
|
12241
13247
|
title: loaded.title,
|
|
12242
13248
|
kind: page.kind,
|
|
12243
13249
|
status: page.status,
|
|
13250
|
+
sourceType: page.sourceType,
|
|
12244
13251
|
projectIds: page.projectIds,
|
|
12245
13252
|
content: loaded.content,
|
|
12246
13253
|
assets: await Promise.all(
|
|
@@ -12262,7 +13269,8 @@ async function exportGraphHtml(rootDir, outputPath) {
|
|
|
12262
13269
|
}
|
|
12263
13270
|
const script = await fs18.readFile(scriptPath, "utf8");
|
|
12264
13271
|
const style = stylePath && await fileExists(stylePath) ? await fs18.readFile(stylePath, "utf8") : "";
|
|
12265
|
-
const
|
|
13272
|
+
const report = await readJsonFile(path22.join(paths.wikiDir, "graph", "report.json"));
|
|
13273
|
+
const embeddedData = JSON.stringify({ graph, pages: pages.filter(Boolean), report }, null, 2).replace(/</g, "\\u003c");
|
|
12266
13274
|
const html = [
|
|
12267
13275
|
"<!doctype html>",
|
|
12268
13276
|
'<html lang="en">',
|
|
@@ -12318,6 +13326,7 @@ export {
|
|
|
12318
13326
|
listApprovals,
|
|
12319
13327
|
listCandidates,
|
|
12320
13328
|
listGodNodes,
|
|
13329
|
+
listGraphHyperedges,
|
|
12321
13330
|
listManifests,
|
|
12322
13331
|
listPages,
|
|
12323
13332
|
listSchedules,
|
|
@@ -12331,6 +13340,7 @@ export {
|
|
|
12331
13340
|
queryVault,
|
|
12332
13341
|
readApproval,
|
|
12333
13342
|
readExtractedText,
|
|
13343
|
+
readGraphReport,
|
|
12334
13344
|
readPage,
|
|
12335
13345
|
rejectApproval,
|
|
12336
13346
|
resolvePaths,
|