@swarmvaultai/engine 0.1.22 → 0.1.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -21,7 +21,7 @@ import {
21
21
  uniqueBy,
22
22
  writeFileIfChanged,
23
23
  writeJsonFile
24
- } from "./chunk-QMW7OISM.js";
24
+ } from "./chunk-6UPHDGEB.js";
25
25
 
26
26
  // src/agents.ts
27
27
  import fs from "fs/promises";
@@ -192,6 +192,9 @@ function graphPageById(graph) {
192
192
  function graphNodeById(graph) {
193
193
  return new Map(graph.nodes.map((node) => [node.id, node]));
194
194
  }
195
+ function exportHyperedgeNodeId(hyperedge) {
196
+ return `hyperedge:${hyperedge.id}`;
197
+ }
195
198
  function sortedCommunities(graph) {
196
199
  const known = (graph.communities ?? []).map((community) => ({
197
200
  ...community,
@@ -356,6 +359,11 @@ function renderGraphMl(graph) {
356
359
  { id: "n_community", for: "node", name: "communityId", type: "string" },
357
360
  { id: "n_degree", for: "node", name: "degree", type: "double" },
358
361
  { id: "n_bridge", for: "node", name: "bridgeScore", type: "double" },
362
+ { id: "n_relation", for: "node", name: "relation", type: "string" },
363
+ { id: "n_evidence", for: "node", name: "evidenceClass", type: "string" },
364
+ { id: "n_confidence", for: "node", name: "confidence", type: "double" },
365
+ { id: "n_source_pages", for: "node", name: "sourcePageIds", type: "string" },
366
+ { id: "n_why", for: "node", name: "why", type: "string" },
359
367
  { id: "e_relation", for: "edge", name: "relation", type: "string" },
360
368
  { id: "e_status", for: "edge", name: "status", type: "string" },
361
369
  { id: "e_evidence", for: "edge", name: "evidenceClass", type: "string" },
@@ -394,6 +402,21 @@ function renderGraphMl(graph) {
394
402
  }
395
403
  lines.push(" </node>");
396
404
  }
405
+ for (const hyperedge of [...graph.hyperedges ?? []].sort((left, right) => left.id.localeCompare(right.id))) {
406
+ lines.push(` <node id="${xmlEscape(exportHyperedgeNodeId(hyperedge))}">`);
407
+ for (const [key, value] of [
408
+ ["n_label", hyperedge.label],
409
+ ["n_type", "hyperedge"],
410
+ ["n_relation", hyperedge.relation],
411
+ ["n_evidence", hyperedge.evidenceClass],
412
+ ["n_confidence", hyperedge.confidence],
413
+ ["n_source_pages", hyperedge.sourcePageIds],
414
+ ["n_why", hyperedge.why]
415
+ ]) {
416
+ lines.push(` <data key="${key}">${xmlEscape(graphMlData(value))}</data>`);
417
+ }
418
+ lines.push(" </node>");
419
+ }
397
420
  for (const edge of [...graph.edges].sort((left, right) => left.id.localeCompare(right.id))) {
398
421
  lines.push(` <edge id="${xmlEscape(edge.id)}" source="${xmlEscape(edge.source)}" target="${xmlEscape(edge.target)}">`);
399
422
  for (const [key, value] of [
@@ -407,6 +430,23 @@ function renderGraphMl(graph) {
407
430
  }
408
431
  lines.push(" </edge>");
409
432
  }
433
+ for (const hyperedge of [...graph.hyperedges ?? []].sort((left, right) => left.id.localeCompare(right.id))) {
434
+ for (const nodeId of hyperedge.nodeIds) {
435
+ lines.push(
436
+ ` <edge id="${xmlEscape(`member:${hyperedge.id}:${nodeId}`)}" source="${xmlEscape(exportHyperedgeNodeId(hyperedge))}" target="${xmlEscape(nodeId)}">`
437
+ );
438
+ for (const [key, value] of [
439
+ ["e_relation", "group_member"],
440
+ ["e_status", "inferred"],
441
+ ["e_evidence", hyperedge.evidenceClass],
442
+ ["e_confidence", hyperedge.confidence],
443
+ ["e_provenance", hyperedge.sourcePageIds]
444
+ ]) {
445
+ lines.push(` <data key="${key}">${xmlEscape(graphMlData(value))}</data>`);
446
+ }
447
+ lines.push(" </edge>");
448
+ }
449
+ }
410
450
  lines.push(" </graph>", "</graphml>", "");
411
451
  return lines.join("\n");
412
452
  }
@@ -433,13 +473,41 @@ function renderCypher(graph) {
433
473
  lines.push(`MERGE (n:SwarmNode {id: '${cypherEscape(node.id)}'}) SET n += { ${props} };`);
434
474
  }
435
475
  lines.push("");
476
+ for (const hyperedge of [...graph.hyperedges ?? []].sort((left, right) => left.id.localeCompare(right.id))) {
477
+ const hyperedgeNodeId = exportHyperedgeNodeId(hyperedge);
478
+ lines.push(
479
+ `MERGE (h:SwarmNode {id: '${cypherEscape(hyperedgeNodeId)}'}) SET h += { id: '${cypherEscape(hyperedgeNodeId)}', label: '${cypherEscape(
480
+ hyperedge.label
481
+ )}', type: 'hyperedge', relation: '${cypherEscape(hyperedge.relation)}', evidenceClass: '${cypherEscape(
482
+ hyperedge.evidenceClass
483
+ )}', confidence: ${hyperedge.confidence}, sourcePageIds: '${cypherEscape(JSON.stringify(hyperedge.sourcePageIds))}', why: '${cypherEscape(
484
+ hyperedge.why
485
+ )}' };`
486
+ );
487
+ }
488
+ if ((graph.hyperedges ?? []).length) {
489
+ lines.push("");
490
+ }
491
+ for (const hyperedge of [...graph.hyperedges ?? []].sort((left, right) => left.id.localeCompare(right.id))) {
492
+ const hyperedgeNodeId = exportHyperedgeNodeId(hyperedge);
493
+ for (const nodeId of hyperedge.nodeIds) {
494
+ lines.push(
495
+ `MATCH (h:SwarmNode {id: '${cypherEscape(hyperedgeNodeId)}'}), (n:SwarmNode {id: '${cypherEscape(nodeId)}'})`,
496
+ `MERGE (h)-[r:GROUP_MEMBER {id: '${cypherEscape(`member:${hyperedge.id}:${nodeId}`)}'}]->(n)`,
497
+ `SET r += { relation: 'group_member', status: 'inferred', evidenceClass: '${cypherEscape(
498
+ hyperedge.evidenceClass
499
+ )}', confidence: ${hyperedge.confidence}, provenance: '${cypherEscape(JSON.stringify(hyperedge.sourcePageIds))}' };`
500
+ );
501
+ }
502
+ }
503
+ lines.push("");
436
504
  for (const edge of [...graph.edges].sort((left, right) => left.id.localeCompare(right.id))) {
437
505
  lines.push(
438
506
  `MATCH (a:SwarmNode {id: '${cypherEscape(edge.source)}'}), (b:SwarmNode {id: '${cypherEscape(edge.target)}'})`,
439
507
  `MERGE (a)-[r:${relationType(edge.relation)} {id: '${cypherEscape(edge.id)}'}]->(b)`,
440
508
  `SET r += { relation: '${cypherEscape(edge.relation)}', status: '${cypherEscape(edge.status)}', evidenceClass: '${cypherEscape(
441
509
  edge.evidenceClass
442
- )}', confidence: ${edge.confidence}, provenance: '${cypherEscape(JSON.stringify(edge.provenance))}' };`
510
+ )}', confidence: ${edge.confidence}, provenance: '${cypherEscape(JSON.stringify(edge.provenance))}'${edge.similarityReasons?.length ? `, similarityReasons: '${cypherEscape(JSON.stringify(edge.similarityReasons))}'` : ""} };`
443
511
  );
444
512
  }
445
513
  lines.push("");
@@ -596,6 +664,7 @@ async function uninstallGitHooks(rootDir) {
596
664
  import fs9 from "fs/promises";
597
665
  import path9 from "path";
598
666
  import { Readability } from "@mozilla/readability";
667
+ import matter3 from "gray-matter";
599
668
  import ignore from "ignore";
600
669
  import { JSDOM } from "jsdom";
601
670
  import mime from "mime-types";
@@ -3204,6 +3273,9 @@ async function markPagesStaleForSources(rootDir, sourceIds) {
3204
3273
  var DEFAULT_MAX_ASSET_SIZE = 10 * 1024 * 1024;
3205
3274
  var DEFAULT_MAX_DIRECTORY_FILES = 5e3;
3206
3275
  var BUILT_IN_REPO_IGNORES = /* @__PURE__ */ new Set([".git", "node_modules", "dist", "build", ".next", "coverage", ".venv", "vendor", "target"]);
3276
+ function uniqueStrings(values) {
3277
+ return [...new Set(values.filter(Boolean))];
3278
+ }
3207
3279
  function inferKind(mimeType, filePath) {
3208
3280
  if (inferCodeLanguage(filePath, mimeType)) {
3209
3281
  return "code";
@@ -3321,6 +3393,22 @@ function arxivIdFromInput(input) {
3321
3393
  return null;
3322
3394
  }
3323
3395
  }
3396
+ function doiFromInput(input) {
3397
+ const trimmed = input.trim();
3398
+ if (/^10\.\S+\/\S+$/i.test(trimmed)) {
3399
+ return trimmed.replace(/\s+/g, "");
3400
+ }
3401
+ try {
3402
+ const url = new URL(trimmed);
3403
+ if (url.hostname === "doi.org" || url.hostname === "dx.doi.org") {
3404
+ const doi = decodeURIComponent(url.pathname.replace(/^\/+/, ""));
3405
+ return /^10\.\S+\/\S+$/i.test(doi) ? doi : null;
3406
+ }
3407
+ } catch {
3408
+ return null;
3409
+ }
3410
+ return null;
3411
+ }
3324
3412
  function isTweetUrl(input) {
3325
3413
  try {
3326
3414
  const url = new URL(input);
@@ -3330,26 +3418,25 @@ function isTweetUrl(input) {
3330
3418
  }
3331
3419
  }
3332
3420
  function markdownFrontmatter(value) {
3333
- const lines = ["---"];
3334
- for (const [key, rawValue] of Object.entries(value)) {
3335
- if (!rawValue) {
3336
- continue;
3337
- }
3338
- lines.push(`${key}: "${rawValue.replace(/\\/g, "\\\\").replace(/"/g, '\\"')}"`);
3339
- }
3340
- lines.push("---", "");
3341
- return lines;
3421
+ const normalized = Object.fromEntries(
3422
+ Object.entries(value).filter(
3423
+ ([, rawValue]) => Array.isArray(rawValue) ? rawValue.length > 0 : Boolean(typeof rawValue === "string" ? rawValue.trim() : rawValue)
3424
+ )
3425
+ );
3426
+ return matter3.stringify("", normalized).trimEnd().split("\n").concat([""]);
3342
3427
  }
3343
3428
  function prepareCapturedMarkdownInput(input) {
3344
3429
  return {
3345
3430
  title: input.title,
3346
3431
  originType: "url",
3347
3432
  sourceKind: "markdown",
3433
+ sourceType: input.sourceType,
3348
3434
  url: normalizeOriginUrl(input.url),
3349
3435
  mimeType: "text/markdown",
3350
3436
  storedExtension: ".md",
3351
3437
  payloadBytes: Buffer.from(input.markdown, "utf8"),
3352
3438
  extractedText: input.markdown,
3439
+ attachments: input.attachments,
3353
3440
  logDetails: input.logDetails
3354
3441
  };
3355
3442
  }
@@ -3360,6 +3447,17 @@ async function fetchText(url) {
3360
3447
  }
3361
3448
  return response.text();
3362
3449
  }
3450
+ async function fetchResolvedText(url) {
3451
+ const response = await fetch(url);
3452
+ if (!response.ok) {
3453
+ throw new Error(`Failed to fetch ${url}: ${response.status} ${response.statusText}`);
3454
+ }
3455
+ return {
3456
+ text: await response.text(),
3457
+ finalUrl: normalizeOriginUrl(response.url || url),
3458
+ contentType: response.headers.get("content-type")?.split(";")[0]?.trim() || "text/html"
3459
+ };
3460
+ }
3363
3461
  function domTextFromHtml(html, baseUrl) {
3364
3462
  const dom = new JSDOM(`<body>${html}</body>`, { url: baseUrl });
3365
3463
  return normalizeWhitespace(dom.window.document.body.textContent ?? "");
@@ -3379,11 +3477,16 @@ async function captureArxivMarkdown(input, options) {
3379
3477
  const authors = [...document.querySelectorAll('meta[name="citation_author"]')].map((node) => node.getAttribute("content")?.trim()).filter((value) => Boolean(value));
3380
3478
  const authorsText = authors.join(", ") || stripLeadingLabel(document.querySelector(".authors")?.textContent?.trim() ?? "", "Authors:");
3381
3479
  const abstract = stripLeadingLabel(document.querySelector("blockquote.abstract")?.textContent?.trim() ?? "", "Abstract:");
3480
+ const categories = [...document.querySelectorAll(".subheader .primary-subject, .metatable .tablecell.subjects")].flatMap((node) => (node.textContent ?? "").split(/;/g)).map((value) => value.trim()).filter(Boolean);
3382
3481
  const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
3383
3482
  const markdown = [
3384
3483
  ...markdownFrontmatter({
3385
- capture_type: "arxiv",
3484
+ source_type: "arxiv",
3386
3485
  source_url: normalizedUrl,
3486
+ canonical_url: normalizedUrl,
3487
+ title,
3488
+ authors,
3489
+ tags: uniqueStrings(categories),
3387
3490
  arxiv_id: arxivId,
3388
3491
  author: options.author,
3389
3492
  contributor: options.contributor,
@@ -3423,8 +3526,11 @@ async function captureTweetMarkdown(input, options) {
3423
3526
  const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
3424
3527
  const markdown = [
3425
3528
  ...markdownFrontmatter({
3426
- capture_type: "tweet",
3529
+ source_type: "tweet",
3427
3530
  source_url: normalizedUrl,
3531
+ canonical_url: canonicalUrl,
3532
+ title,
3533
+ authors: postAuthor ? [postAuthor] : void 0,
3428
3534
  author: options.author,
3429
3535
  contributor: options.contributor,
3430
3536
  captured_at: capturedAt
@@ -3446,6 +3552,101 @@ async function captureTweetMarkdown(input, options) {
3446
3552
  ].join("\n");
3447
3553
  return { title, normalizedUrl, markdown };
3448
3554
  }
3555
+ function firstMetaContent(document, selectors) {
3556
+ for (const selector of selectors) {
3557
+ const value = document.querySelector(selector)?.getAttribute("content")?.trim();
3558
+ if (value) {
3559
+ return value;
3560
+ }
3561
+ }
3562
+ return void 0;
3563
+ }
3564
+ function metaContents(document, selectors) {
3565
+ return uniqueStrings(
3566
+ selectors.flatMap(
3567
+ (selector) => [...document.querySelectorAll(selector)].map((node) => node.getAttribute("content")?.trim() ?? "").filter(Boolean)
3568
+ )
3569
+ );
3570
+ }
3571
+ function splitKeywords(value) {
3572
+ return uniqueStrings(
3573
+ (value ?? "").split(/[;,]/g).map((item) => item.trim()).filter(Boolean)
3574
+ );
3575
+ }
3576
+ async function captureArticleMarkdown(rootDir, input, options, extra = { sourceType: "article" }) {
3577
+ const resolved = await fetchResolvedText(input);
3578
+ if (!resolved.contentType.includes("html")) {
3579
+ throw new Error(`Unsupported article content type: ${resolved.contentType}`);
3580
+ }
3581
+ const dom = new JSDOM(resolved.text, { url: resolved.finalUrl });
3582
+ const document = dom.window.document;
3583
+ const canonicalHref = document.querySelector('link[rel="canonical"]')?.getAttribute("href")?.trim();
3584
+ const canonicalUrl = canonicalHref ? normalizeOriginUrl(new URL(canonicalHref, resolved.finalUrl).toString()) : resolved.finalUrl;
3585
+ const title = firstMetaContent(document, ['meta[name="citation_title"]', 'meta[property="og:title"]', 'meta[name="twitter:title"]']) ?? (document.title.trim() || canonicalUrl);
3586
+ const authors = uniqueStrings([
3587
+ ...metaContents(document, ['meta[name="citation_author"]']),
3588
+ ...metaContents(document, ['meta[name="author"]', 'meta[property="article:author"]'])
3589
+ ]);
3590
+ const publishedAt = firstMetaContent(document, [
3591
+ 'meta[name="citation_publication_date"]',
3592
+ 'meta[name="citation_online_date"]',
3593
+ 'meta[property="article:published_time"]',
3594
+ 'meta[name="pubdate"]'
3595
+ ]);
3596
+ const updatedAt = firstMetaContent(document, ['meta[property="article:modified_time"]', 'meta[name="lastmod"]']);
3597
+ const tags = uniqueStrings([
3598
+ ...metaContents(document, ['meta[property="article:tag"]']),
3599
+ ...splitKeywords(firstMetaContent(document, ['meta[name="keywords"]']))
3600
+ ]);
3601
+ const inferredDoi = extra.doi ?? firstMetaContent(document, ['meta[name="citation_doi"]', 'meta[name="dc.identifier"]'])?.replace(/^doi:\s*/i, "") ?? void 0;
3602
+ const normalizedOptions = normalizeIngestOptions(options);
3603
+ const prepared = await prepareUrlInput(rootDir, canonicalUrl, normalizedOptions);
3604
+ if (prepared.sourceKind !== "markdown" && prepared.sourceKind !== "text") {
3605
+ throw new Error(`Unsupported prepared article kind: ${prepared.sourceKind}`);
3606
+ }
3607
+ const body = prepared.extractedText ?? prepared.payloadBytes.toString("utf8");
3608
+ const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
3609
+ const markdown = [
3610
+ ...markdownFrontmatter({
3611
+ source_type: extra.sourceType,
3612
+ source_url: extra.sourceUrl ?? input,
3613
+ canonical_url: canonicalUrl,
3614
+ title,
3615
+ authors,
3616
+ published_at: publishedAt,
3617
+ updated_at: updatedAt,
3618
+ doi: inferredDoi,
3619
+ tags,
3620
+ author: options.author,
3621
+ contributor: options.contributor,
3622
+ captured_at: capturedAt
3623
+ }),
3624
+ body.trim(),
3625
+ "",
3626
+ "## Source",
3627
+ "",
3628
+ `- URL: ${canonicalUrl}`,
3629
+ ...extra.sourceType === "doi" && inferredDoi ? [`- DOI: ${inferredDoi}`] : [],
3630
+ ""
3631
+ ].join("\n");
3632
+ return {
3633
+ title,
3634
+ normalizedUrl: canonicalUrl,
3635
+ markdown,
3636
+ attachments: prepared.attachments
3637
+ };
3638
+ }
3639
+ async function captureDoiMarkdown(rootDir, input, options) {
3640
+ const doi = doiFromInput(input);
3641
+ if (!doi) {
3642
+ throw new Error(`Could not determine a DOI from ${input}`);
3643
+ }
3644
+ return captureArticleMarkdown(rootDir, `https://doi.org/${encodeURIComponent(doi)}`, options, {
3645
+ sourceType: "doi",
3646
+ sourceUrl: input,
3647
+ doi
3648
+ });
3649
+ }
3449
3650
  function manifestMatchesOrigin(manifest, prepared) {
3450
3651
  if (prepared.originType === "url") {
3451
3652
  return Boolean(prepared.url && manifest.url && normalizeOriginUrl(manifest.url) === normalizeOriginUrl(prepared.url));
@@ -3789,7 +3990,7 @@ async function persistPreparedInput(rootDir, prepared, paths) {
3789
3990
  const extractionHash = prepared.extractionHash ?? buildExtractionHash(prepared.extractedText, prepared.extractionArtifact);
3790
3991
  const existingByOrigin = await readManifestByOrigin(paths.manifestsDir, prepared);
3791
3992
  const existingByHash = existingByOrigin ? null : await readManifestByHash(paths.manifestsDir, contentHash);
3792
- if (existingByOrigin && existingByOrigin.contentHash === contentHash && existingByOrigin.extractionHash === extractionHash && existingByOrigin.title === prepared.title && existingByOrigin.sourceKind === prepared.sourceKind && existingByOrigin.language === prepared.language && existingByOrigin.mimeType === prepared.mimeType && existingByOrigin.repoRelativePath === prepared.repoRelativePath) {
3993
+ if (existingByOrigin && existingByOrigin.contentHash === contentHash && existingByOrigin.extractionHash === extractionHash && existingByOrigin.title === prepared.title && existingByOrigin.sourceKind === prepared.sourceKind && existingByOrigin.sourceType === prepared.sourceType && existingByOrigin.language === prepared.language && existingByOrigin.mimeType === prepared.mimeType && existingByOrigin.repoRelativePath === prepared.repoRelativePath) {
3793
3994
  return { manifest: existingByOrigin, isNew: false, wasUpdated: false };
3794
3995
  }
3795
3996
  if (existingByHash) {
@@ -3835,6 +4036,7 @@ async function persistPreparedInput(rootDir, prepared, paths) {
3835
4036
  title: prepared.title,
3836
4037
  originType: prepared.originType,
3837
4038
  sourceKind: prepared.sourceKind,
4039
+ sourceType: prepared.sourceType,
3838
4040
  language: prepared.language,
3839
4041
  originalPath: prepared.originalPath,
3840
4042
  repoRelativePath: prepared.repoRelativePath,
@@ -3892,7 +4094,7 @@ function repoSyncWorkspaceIgnorePaths(rootDir, paths, repoRoot) {
3892
4094
  return candidates.map((candidate) => path9.resolve(candidate)).filter((candidate, index, items) => items.indexOf(candidate) === index).filter((candidate) => withinRoot(repoRoot, candidate));
3893
4095
  }
3894
4096
  function preparedMatchesManifest(manifest, prepared, contentHash) {
3895
- return manifest.contentHash === contentHash && manifest.extractionHash === (prepared.extractionHash ?? buildExtractionHash(prepared.extractedText, prepared.extractionArtifact)) && manifest.title === prepared.title && manifest.sourceKind === prepared.sourceKind && manifest.language === prepared.language && manifest.mimeType === prepared.mimeType && manifest.repoRelativePath === prepared.repoRelativePath;
4097
+ return manifest.contentHash === contentHash && manifest.extractionHash === (prepared.extractionHash ?? buildExtractionHash(prepared.extractedText, prepared.extractionArtifact)) && manifest.title === prepared.title && manifest.sourceKind === prepared.sourceKind && manifest.sourceType === prepared.sourceType && manifest.language === prepared.language && manifest.mimeType === prepared.mimeType && manifest.repoRelativePath === prepared.repoRelativePath;
3896
4098
  }
3897
4099
  function shouldDeferWatchSemanticRefresh(sourceKind) {
3898
4100
  return sourceKind === "markdown" || sourceKind === "text" || sourceKind === "html" || sourceKind === "pdf" || sourceKind === "image";
@@ -4184,7 +4386,8 @@ async function prepareUrlInput(rootDir, input, options) {
4184
4386
  if (!response.ok) {
4185
4387
  throw new Error(`Failed to fetch ${input}: ${response.status} ${response.statusText}`);
4186
4388
  }
4187
- const inputUrl = new URL(input);
4389
+ const finalUrl = normalizeOriginUrl(response.url || input);
4390
+ const inputUrl = new URL(finalUrl);
4188
4391
  const originalPayloadBytes = Buffer.from(await response.arrayBuffer());
4189
4392
  let payloadBytes = originalPayloadBytes;
4190
4393
  let mimeType = resolveUrlMimeType(input, response);
@@ -4199,13 +4402,13 @@ async function prepareUrlInput(rootDir, input, options) {
4199
4402
  const logDetails = [];
4200
4403
  if (sourceKind === "html" || mimeType.startsWith("text/html")) {
4201
4404
  const html = originalPayloadBytes.toString("utf8");
4202
- const initialConversion = await convertHtmlToMarkdown(html, input);
4405
+ const initialConversion = await convertHtmlToMarkdown(html, finalUrl);
4203
4406
  title = initialConversion.title;
4204
4407
  let localizedHtml = html;
4205
4408
  let localAssetReplacements;
4206
4409
  if (options.includeAssets) {
4207
4410
  const { attachments: remoteAttachments, skippedCount } = await collectRemoteImageAttachments(
4208
- extractHtmlImageReferences(html, input),
4411
+ extractHtmlImageReferences(html, finalUrl),
4209
4412
  options
4210
4413
  );
4211
4414
  if (remoteAttachments.length) {
@@ -4215,19 +4418,19 @@ async function prepareUrlInput(rootDir, input, options) {
4215
4418
  localAssetReplacements = new Map(
4216
4419
  remoteAttachments.map((attachment) => [attachment.originalPath ?? "", `../assets/${sourceId}/${attachment.relativePath}`])
4217
4420
  );
4218
- localizedHtml = rewriteHtmlImageReferences(html, input, localAssetReplacements);
4421
+ localizedHtml = rewriteHtmlImageReferences(html, finalUrl, localAssetReplacements);
4219
4422
  logDetails.push(`remote_assets=${remoteAttachments.length}`);
4220
4423
  }
4221
4424
  if (skippedCount) {
4222
4425
  logDetails.push(`remote_asset_skips=${skippedCount}`);
4223
4426
  }
4224
4427
  }
4225
- const converted = localizedHtml === html && !attachments?.length ? initialConversion : await convertHtmlToMarkdown(localizedHtml, input);
4428
+ const converted = localizedHtml === html && !attachments?.length ? initialConversion : await convertHtmlToMarkdown(localizedHtml, finalUrl);
4226
4429
  extractedText = converted.markdown;
4227
4430
  extractionArtifact = createHtmlReadabilityExtractionArtifact("markdown", "text/markdown");
4228
4431
  if (localAssetReplacements?.size) {
4229
4432
  const absoluteLocalAssetReplacements = new Map(
4230
- [...localAssetReplacements.values()].map((replacement) => [new URL(replacement, input).toString(), replacement])
4433
+ [...localAssetReplacements.values()].map((replacement) => [new URL(replacement, finalUrl).toString(), replacement])
4231
4434
  );
4232
4435
  extractedText = rewriteMarkdownImageTargets(extractedText, absoluteLocalAssetReplacements);
4233
4436
  }
@@ -4244,7 +4447,7 @@ async function prepareUrlInput(rootDir, input, options) {
4244
4447
  extractionArtifact = createPlainTextExtractionArtifact(sourceKind, mimeType);
4245
4448
  if (sourceKind === "markdown" && options.includeAssets) {
4246
4449
  const { attachments: remoteAttachments, skippedCount } = await collectRemoteImageAttachments(
4247
- extractMarkdownImageReferences(extractedText, input),
4450
+ extractMarkdownImageReferences(extractedText, finalUrl),
4248
4451
  options
4249
4452
  );
4250
4453
  if (remoteAttachments.length) {
@@ -4254,7 +4457,7 @@ async function prepareUrlInput(rootDir, input, options) {
4254
4457
  const replacements = new Map(
4255
4458
  remoteAttachments.map((attachment) => [attachment.originalPath ?? "", `../assets/${sourceId}/${attachment.relativePath}`])
4256
4459
  );
4257
- extractedText = rewriteMarkdownImageReferences(extractedText, input, replacements);
4460
+ extractedText = rewriteMarkdownImageReferences(extractedText, finalUrl, replacements);
4258
4461
  payloadBytes = Buffer.from(extractedText, "utf8");
4259
4462
  logDetails.push(`remote_assets=${remoteAttachments.length}`);
4260
4463
  }
@@ -4282,7 +4485,7 @@ async function prepareUrlInput(rootDir, input, options) {
4282
4485
  originType: "url",
4283
4486
  sourceKind,
4284
4487
  language,
4285
- url: input,
4488
+ url: finalUrl,
4286
4489
  mimeType,
4287
4490
  storedExtension,
4288
4491
  payloadBytes,
@@ -4395,8 +4598,8 @@ async function ingestInput(rootDir, input, options) {
4395
4598
  }
4396
4599
  async function addInput(rootDir, input, options = {}) {
4397
4600
  const { paths } = await initWorkspace(rootDir);
4398
- if (!isHttpUrl(input) && !arxivIdFromInput(input)) {
4399
- throw new Error("`swarmvault add` only supports URLs and bare arXiv ids in the current release.");
4601
+ if (!isHttpUrl(input) && !arxivIdFromInput(input) && !doiFromInput(input)) {
4602
+ throw new Error("`swarmvault add` only supports URLs, bare arXiv ids, and bare DOI strings in the current release.");
4400
4603
  }
4401
4604
  let prepared = null;
4402
4605
  let captureType = "url";
@@ -4409,26 +4612,55 @@ async function addInput(rootDir, input, options = {}) {
4409
4612
  title: captured.title,
4410
4613
  url: captured.normalizedUrl,
4411
4614
  markdown: captured.markdown,
4615
+ sourceType: "arxiv",
4412
4616
  logDetails: ["capture_type=arxiv"]
4413
4617
  });
4414
4618
  captureType = "arxiv";
4415
4619
  normalizedUrl = captured.normalizedUrl;
4620
+ } else if (doiFromInput(input)) {
4621
+ const captured = await captureDoiMarkdown(rootDir, input, options);
4622
+ prepared = prepareCapturedMarkdownInput({
4623
+ title: captured.title,
4624
+ url: captured.normalizedUrl,
4625
+ markdown: captured.markdown,
4626
+ sourceType: "doi",
4627
+ attachments: captured.attachments,
4628
+ logDetails: ["capture_type=doi"]
4629
+ });
4630
+ captureType = "doi";
4631
+ normalizedUrl = captured.normalizedUrl;
4416
4632
  } else if (isTweetUrl(input)) {
4417
4633
  const captured = await captureTweetMarkdown(input, options);
4418
4634
  prepared = prepareCapturedMarkdownInput({
4419
4635
  title: captured.title,
4420
4636
  url: captured.normalizedUrl,
4421
4637
  markdown: captured.markdown,
4638
+ sourceType: "tweet",
4422
4639
  logDetails: ["capture_type=tweet"]
4423
4640
  });
4424
4641
  captureType = "tweet";
4425
4642
  normalizedUrl = captured.normalizedUrl;
4643
+ } else if (isHttpUrl(input)) {
4644
+ const captured = await captureArticleMarkdown(rootDir, input, options, {
4645
+ sourceType: "article",
4646
+ sourceUrl: input
4647
+ });
4648
+ prepared = prepareCapturedMarkdownInput({
4649
+ title: captured.title,
4650
+ url: captured.normalizedUrl,
4651
+ markdown: captured.markdown,
4652
+ sourceType: "article",
4653
+ attachments: captured.attachments,
4654
+ logDetails: ["capture_type=article"]
4655
+ });
4656
+ captureType = "article";
4657
+ normalizedUrl = captured.normalizedUrl;
4426
4658
  }
4427
4659
  } catch {
4428
4660
  fallback = true;
4429
4661
  }
4430
4662
  if (!prepared) {
4431
- normalizedUrl = arxivIdFromInput(input) ? `https://arxiv.org/abs/${arxivIdFromInput(input)}` : normalizeOriginUrl(input);
4663
+ normalizedUrl = arxivIdFromInput(input) ? `https://arxiv.org/abs/${arxivIdFromInput(input)}` : doiFromInput(input) ? `https://doi.org/${encodeURIComponent(doiFromInput(input) ?? "")}` : normalizeOriginUrl(input);
4432
4664
  return {
4433
4665
  captureType: "url",
4434
4666
  manifest: await ingestInput(rootDir, normalizedUrl, options),
@@ -4684,7 +4916,7 @@ function buildSchemaPrompt(schema, instruction) {
4684
4916
  // src/vault.ts
4685
4917
  import fs15 from "fs/promises";
4686
4918
  import path18 from "path";
4687
- import matter8 from "gray-matter";
4919
+ import matter9 from "gray-matter";
4688
4920
  import { z as z7 } from "zod";
4689
4921
 
4690
4922
  // src/analysis.ts
@@ -4991,6 +5223,7 @@ var DEFAULT_BENCHMARK_QUESTIONS = [
4991
5223
  "Where are the biggest knowledge gaps?",
4992
5224
  "What evidence should I read first?"
4993
5225
  ];
5226
+ var RESEARCH_BENCHMARK_QUESTION = "Which research sources should I read first, and why?";
4994
5227
  function nodeMap(graph) {
4995
5228
  return new Map(graph.nodes.map((node) => [node.id, node]));
4996
5229
  }
@@ -5040,9 +5273,68 @@ function benchmarkQueryTokens(graph, queryResult, pageContentsById) {
5040
5273
  queryTokens,
5041
5274
  reduction: 0,
5042
5275
  visitedNodeIds: queryResult.visitedNodeIds,
5276
+ visitedEdgeIds: queryResult.visitedEdgeIds,
5043
5277
  pageIds: queryResult.pageIds
5044
5278
  };
5045
5279
  }
5280
+ function graphHash(graph) {
5281
+ const hashedPages = graph.pages.filter((page) => page.kind !== "graph_report" && page.kind !== "community_summary");
5282
+ const normalized = JSON.stringify(
5283
+ {
5284
+ nodes: [...graph.nodes].map((node) => ({
5285
+ id: node.id,
5286
+ type: node.type,
5287
+ label: node.label,
5288
+ pageId: node.pageId ?? null,
5289
+ communityId: node.communityId ?? null,
5290
+ degree: node.degree ?? null,
5291
+ bridgeScore: node.bridgeScore ?? null,
5292
+ isGodNode: node.isGodNode ?? false,
5293
+ sourceIds: [...node.sourceIds].sort(),
5294
+ projectIds: [...node.projectIds].sort()
5295
+ })).sort((left, right) => left.id.localeCompare(right.id)),
5296
+ edges: [...graph.edges].map((edge) => ({
5297
+ id: edge.id,
5298
+ source: edge.source,
5299
+ target: edge.target,
5300
+ relation: edge.relation,
5301
+ status: edge.status,
5302
+ evidenceClass: edge.evidenceClass,
5303
+ confidence: edge.confidence,
5304
+ provenance: [...edge.provenance].sort()
5305
+ })).sort((left, right) => left.id.localeCompare(right.id)),
5306
+ pages: [...hashedPages].map((page) => ({
5307
+ id: page.id,
5308
+ path: page.path,
5309
+ kind: page.kind,
5310
+ status: page.status,
5311
+ sourceType: page.sourceType ?? null,
5312
+ sourceIds: [...page.sourceIds].sort(),
5313
+ projectIds: [...page.projectIds].sort(),
5314
+ nodeIds: [...page.nodeIds].sort()
5315
+ })).sort((left, right) => left.id.localeCompare(right.id)),
5316
+ communities: [...graph.communities ?? []].map((community) => ({
5317
+ id: community.id,
5318
+ label: community.label,
5319
+ nodeIds: [...community.nodeIds].sort()
5320
+ })).sort((left, right) => left.id.localeCompare(right.id))
5321
+ },
5322
+ null,
5323
+ 0
5324
+ );
5325
+ return sha256(normalized);
5326
+ }
5327
+ function hasResearchSources(pages) {
5328
+ return pages.some((page) => page.kind === "source" && Boolean(page.sourceType) && page.sourceType !== "url");
5329
+ }
5330
+ function defaultBenchmarkQuestionsForGraph(graph, maxQuestions = 3) {
5331
+ const normalizedLimit = Math.max(1, Math.min(maxQuestions, DEFAULT_BENCHMARK_QUESTIONS.length));
5332
+ const questions = [...DEFAULT_BENCHMARK_QUESTIONS];
5333
+ if (hasResearchSources(graph.pages)) {
5334
+ questions.unshift(RESEARCH_BENCHMARK_QUESTION);
5335
+ }
5336
+ return uniqueBy(questions, (item) => item).slice(0, normalizedLimit);
5337
+ }
5046
5338
  function buildBenchmarkArtifact(input) {
5047
5339
  const corpusTokens = Math.max(1, Math.round(input.corpusWords * (100 / 75)));
5048
5340
  const perQuestion = input.perQuestion.filter((entry) => entry.queryTokens > 0).map((entry) => ({
@@ -5051,8 +5343,18 @@ function buildBenchmarkArtifact(input) {
5051
5343
  }));
5052
5344
  const avgQueryTokens = perQuestion.length ? Math.max(1, Math.round(perQuestion.reduce((total, entry) => total + entry.queryTokens, 0) / perQuestion.length)) : 0;
5053
5345
  const reductionRatio = avgQueryTokens ? Number(Math.max(0, 1 - avgQueryTokens / Math.max(1, corpusTokens)).toFixed(3)) : 0;
5346
+ const uniqueVisitedNodes = new Set(perQuestion.flatMap((entry) => entry.visitedNodeIds)).size;
5347
+ const summary = {
5348
+ questionCount: input.questions.length,
5349
+ uniqueVisitedNodes,
5350
+ finalContextTokens: avgQueryTokens,
5351
+ naiveCorpusTokens: corpusTokens,
5352
+ avgReduction: reductionRatio,
5353
+ reductionRatio
5354
+ };
5054
5355
  return {
5055
5356
  generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
5357
+ graphHash: graphHash(input.graph),
5056
5358
  corpusWords: input.corpusWords,
5057
5359
  corpusTokens,
5058
5360
  nodes: input.graph.nodes.length,
@@ -5060,7 +5362,9 @@ function buildBenchmarkArtifact(input) {
5060
5362
  avgQueryTokens,
5061
5363
  reductionRatio,
5062
5364
  sampleQuestions: input.questions,
5063
- perQuestion
5365
+ perQuestion,
5366
+ questionResults: perQuestion,
5367
+ summary
5064
5368
  };
5065
5369
  }
5066
5370
 
@@ -5083,7 +5387,7 @@ function conflictConfidence(claimA, claimB) {
5083
5387
  // src/deep-lint.ts
5084
5388
  import fs11 from "fs/promises";
5085
5389
  import path14 from "path";
5086
- import matter3 from "gray-matter";
5390
+ import matter4 from "gray-matter";
5087
5391
  import { z as z5 } from "zod";
5088
5392
 
5089
5393
  // src/findings.ts
@@ -5450,7 +5754,7 @@ async function loadContextPages(rootDir, graph) {
5450
5754
  contextPages.slice(0, 18).map(async (page) => {
5451
5755
  const absolutePath = path14.join(paths.wikiDir, page.path);
5452
5756
  const raw = await fs11.readFile(absolutePath, "utf8").catch(() => "");
5453
- const parsed = matter3(raw);
5757
+ const parsed = matter4(raw);
5454
5758
  return {
5455
5759
  id: page.id,
5456
5760
  title: page.title,
@@ -5674,6 +5978,331 @@ async function runDeepLint(rootDir, structuralFindings, options = {}) {
5674
5978
  );
5675
5979
  }
5676
5980
 
5981
+ // src/graph-enrichment.ts
5982
+ var STOPWORDS2 = /* @__PURE__ */ new Set([
5983
+ "about",
5984
+ "after",
5985
+ "also",
5986
+ "among",
5987
+ "and",
5988
+ "around",
5989
+ "because",
5990
+ "been",
5991
+ "being",
5992
+ "between",
5993
+ "both",
5994
+ "does",
5995
+ "from",
5996
+ "into",
5997
+ "just",
5998
+ "like",
5999
+ "many",
6000
+ "more",
6001
+ "most",
6002
+ "much",
6003
+ "note",
6004
+ "only",
6005
+ "other",
6006
+ "over",
6007
+ "same",
6008
+ "such",
6009
+ "than",
6010
+ "that",
6011
+ "their",
6012
+ "them",
6013
+ "there",
6014
+ "these",
6015
+ "this",
6016
+ "through",
6017
+ "under",
6018
+ "very",
6019
+ "what",
6020
+ "when",
6021
+ "where",
6022
+ "which",
6023
+ "while",
6024
+ "with",
6025
+ "would",
6026
+ "your"
6027
+ ]);
6028
+ function normalizeValue(value) {
6029
+ return normalizeWhitespace(value).toLowerCase();
6030
+ }
6031
+ function addFeature(bucket, reason, value) {
6032
+ if (!value) {
6033
+ return;
6034
+ }
6035
+ const normalized = normalizeValue(value);
6036
+ if (!normalized) {
6037
+ return;
6038
+ }
6039
+ if (!bucket.has(reason)) {
6040
+ bucket.set(reason, /* @__PURE__ */ new Set());
6041
+ }
6042
+ bucket.get(reason)?.add(normalized);
6043
+ }
6044
+ function themeTokens(value) {
6045
+ return uniqueBy(
6046
+ normalizeValue(value).split(/[^a-z0-9]+/i).filter((token) => token.length >= 4 && !STOPWORDS2.has(token)),
6047
+ (token) => token
6048
+ ).slice(0, 6);
6049
+ }
6050
+ function pairKey(left, right) {
6051
+ return [left, right].sort((a, b) => a.localeCompare(b)).join("|");
6052
+ }
6053
+ function hasDistinctScope(left, right) {
6054
+ if (left.pageId && right.pageId && left.pageId !== right.pageId) {
6055
+ return true;
6056
+ }
6057
+ const leftSources = new Set(left.sourceIds);
6058
+ const rightSources = new Set(right.sourceIds);
6059
+ const leftOnly = [...leftSources].some((sourceId) => !rightSources.has(sourceId));
6060
+ const rightOnly = [...rightSources].some((sourceId) => !leftSources.has(sourceId));
6061
+ return leftOnly || rightOnly;
6062
+ }
6063
+ function supportCount(values) {
6064
+ return values?.size ?? 0;
6065
+ }
6066
+ function similarityScore(reasons) {
6067
+ const concept = supportCount(reasons.get("shared_concept"));
6068
+ const entity = supportCount(reasons.get("shared_entity"));
6069
+ const symbol = supportCount(reasons.get("shared_symbol"));
6070
+ const rationale = supportCount(reasons.get("shared_rationale_theme"));
6071
+ const sourceType = supportCount(reasons.get("shared_source_type"));
6072
+ const tag = supportCount(reasons.get("shared_tag"));
6073
+ const categoryCount = [...reasons.keys()].length;
6074
+ const weighted = (concept ? 0.46 + Math.min(0.12, (concept - 1) * 0.04) : 0) + (entity ? 0.34 + Math.min(0.1, (entity - 1) * 0.03) : 0) + (symbol ? 0.24 + Math.min(0.08, (symbol - 1) * 0.02) : 0) + (rationale ? 0.18 + Math.min(0.08, (rationale - 1) * 0.03) : 0) + (sourceType ? 0.1 : 0) + (tag ? 0.12 + Math.min(0.04, (tag - 1) * 0.02) : 0);
6075
+ const categoryBonus = categoryCount >= 3 ? 0.08 : categoryCount === 2 ? 0.04 : 0;
6076
+ return Math.min(0.96, weighted + categoryBonus);
6077
+ }
6078
+ function describeSimilarityReasons(reasons) {
6079
+ if (!reasons?.length) {
6080
+ return "This link is inferred from multiple shared graph features.";
6081
+ }
6082
+ const labels = reasons.map(
6083
+ (reason) => reason === "shared_concept" ? "shared concepts" : reason === "shared_entity" ? "shared entities" : reason === "shared_symbol" ? "shared symbols" : reason === "shared_rationale_theme" ? "shared rationale themes" : reason === "shared_source_type" ? "shared source type" : "shared tags"
6084
+ );
6085
+ return `This link is inferred from ${labels.join(", ")}.`;
6086
+ }
6087
+ function nodeContexts(nodes, manifests, analyses) {
6088
+ const manifestsBySourceId = new Map(manifests.map((manifest) => [manifest.sourceId, manifest]));
6089
+ const analysesBySourceId = new Map(analyses.map((analysis) => [analysis.sourceId, analysis]));
6090
+ return nodes.filter((node) => node.type !== "symbol" && node.type !== "concept" && node.type !== "entity").map((node) => {
6091
+ const features = /* @__PURE__ */ new Map();
6092
+ if (node.type === "source" || node.type === "module") {
6093
+ for (const sourceId of node.sourceIds) {
6094
+ const analysis = analysesBySourceId.get(sourceId);
6095
+ const manifest = manifestsBySourceId.get(sourceId);
6096
+ if (!analysis) {
6097
+ continue;
6098
+ }
6099
+ for (const concept of analysis.concepts) {
6100
+ addFeature(features, "shared_concept", concept.name);
6101
+ }
6102
+ for (const entity of analysis.entities) {
6103
+ addFeature(features, "shared_entity", entity.name);
6104
+ }
6105
+ if (manifest?.sourceType) {
6106
+ addFeature(features, "shared_source_type", manifest.sourceType);
6107
+ }
6108
+ if (analysis.code) {
6109
+ const exportedSymbols = analysis.code.symbols.filter((symbol) => symbol.exported);
6110
+ for (const symbol of (exportedSymbols.length ? exportedSymbols : analysis.code.symbols).slice(0, 12)) {
6111
+ addFeature(features, "shared_symbol", symbol.name);
6112
+ }
6113
+ }
6114
+ for (const rationale of analysis.rationales) {
6115
+ for (const token of themeTokens(rationale.text)) {
6116
+ addFeature(features, "shared_rationale_theme", token);
6117
+ }
6118
+ }
6119
+ }
6120
+ } else if (node.type === "rationale") {
6121
+ for (const sourceId of node.sourceIds) {
6122
+ const analysis = analysesBySourceId.get(sourceId);
6123
+ const manifest = manifestsBySourceId.get(sourceId);
6124
+ if (manifest?.sourceType) {
6125
+ addFeature(features, "shared_source_type", manifest.sourceType);
6126
+ }
6127
+ const rationale = analysis?.rationales.find((item) => item.id === node.id);
6128
+ for (const token of themeTokens(rationale?.text ?? node.label)) {
6129
+ addFeature(features, "shared_rationale_theme", token);
6130
+ }
6131
+ }
6132
+ }
6133
+ return { node, featureValues: features };
6134
+ }).filter((context) => context.featureValues.size > 0);
6135
+ }
6136
+ function buildSemanticSimilarityEdges(nodes, edges, manifests, analyses) {
6137
+ const contexts = nodeContexts(nodes, manifests, analyses);
6138
+ const contextsById = new Map(contexts.map((context) => [context.node.id, context]));
6139
+ const directPairs = new Set(edges.map((edge) => pairKey(edge.source, edge.target)));
6140
+ const pairReasons = /* @__PURE__ */ new Map();
6141
+ for (const reason of ["shared_concept", "shared_entity", "shared_symbol", "shared_rationale_theme", "shared_source_type"]) {
6142
+ const buckets = /* @__PURE__ */ new Map();
6143
+ for (const context of contexts) {
6144
+ for (const value of context.featureValues.get(reason) ?? []) {
6145
+ const bucketId = `${context.node.type}:${reason}:${value}`;
6146
+ if (!buckets.has(bucketId)) {
6147
+ buckets.set(bucketId, []);
6148
+ }
6149
+ buckets.get(bucketId)?.push(context.node.id);
6150
+ }
6151
+ }
6152
+ for (const [bucketId, nodeIds] of buckets.entries()) {
6153
+ if (nodeIds.length < 2) {
6154
+ continue;
6155
+ }
6156
+ const value = bucketId.slice(bucketId.indexOf(`${reason}:`) + `${reason}:`.length);
6157
+ const uniqueNodeIds = uniqueBy(nodeIds, (nodeId) => nodeId).sort((left, right) => left.localeCompare(right));
6158
+ for (let index = 0; index < uniqueNodeIds.length; index++) {
6159
+ const left = contextsById.get(uniqueNodeIds[index]);
6160
+ if (!left) {
6161
+ continue;
6162
+ }
6163
+ for (let cursor = index + 1; cursor < uniqueNodeIds.length; cursor++) {
6164
+ const right = contextsById.get(uniqueNodeIds[cursor]);
6165
+ if (!right || !hasDistinctScope(left.node, right.node)) {
6166
+ continue;
6167
+ }
6168
+ const key = pairKey(left.node.id, right.node.id);
6169
+ if (directPairs.has(key)) {
6170
+ continue;
6171
+ }
6172
+ if (!pairReasons.has(key)) {
6173
+ pairReasons.set(key, /* @__PURE__ */ new Map());
6174
+ }
6175
+ if (!pairReasons.get(key)?.has(reason)) {
6176
+ pairReasons.get(key)?.set(reason, /* @__PURE__ */ new Set());
6177
+ }
6178
+ pairReasons.get(key)?.get(reason)?.add(value);
6179
+ }
6180
+ }
6181
+ }
6182
+ }
6183
+ return [...pairReasons.entries()].flatMap(([key, reasons]) => {
6184
+ const [leftId, rightId] = key.split("|");
6185
+ const left = contextsById.get(leftId)?.node;
6186
+ const right = contextsById.get(rightId)?.node;
6187
+ if (!left || !right) {
6188
+ return [];
6189
+ }
6190
+ const confidence = similarityScore(reasons);
6191
+ if (confidence < 0.5) {
6192
+ return [];
6193
+ }
6194
+ return [
6195
+ {
6196
+ id: `similar:${sha256(`${left.id}|${right.id}|${[...reasons.keys()].sort().join(",")}`).slice(0, 16)}`,
6197
+ source: left.id,
6198
+ target: right.id,
6199
+ relation: "semantically_similar_to",
6200
+ status: "inferred",
6201
+ evidenceClass: "inferred",
6202
+ confidence,
6203
+ provenance: uniqueBy(
6204
+ [...left.sourceIds, ...right.sourceIds].sort((a, b) => a.localeCompare(b)),
6205
+ (value) => value
6206
+ ),
6207
+ similarityReasons: [...reasons.keys()].sort((a, b) => a.localeCompare(b))
6208
+ }
6209
+ ];
6210
+ }).sort((left, right) => right.confidence - left.confidence || left.id.localeCompare(right.id));
6211
+ }
6212
+ function buildTopicHyperedges(graph) {
6213
+ const nodesById = new Map(graph.nodes.map((node) => [node.id, node]));
6214
+ const connectedSources = /* @__PURE__ */ new Map();
6215
+ for (const edge of graph.edges) {
6216
+ if (edge.relation !== "mentions" || edge.evidenceClass !== "extracted") {
6217
+ continue;
6218
+ }
6219
+ const sourceNode = nodesById.get(edge.source);
6220
+ const targetNode = nodesById.get(edge.target);
6221
+ if (sourceNode?.type !== "source" || !(targetNode?.type === "concept" || targetNode?.type === "entity")) {
6222
+ continue;
6223
+ }
6224
+ if (!connectedSources.has(targetNode.id)) {
6225
+ connectedSources.set(targetNode.id, []);
6226
+ }
6227
+ connectedSources.get(targetNode.id)?.push(sourceNode.id);
6228
+ }
6229
+ return [...connectedSources.entries()].flatMap(([anchorId, members]) => {
6230
+ const anchor = nodesById.get(anchorId);
6231
+ const uniqueMembers = uniqueBy(members, (member) => member).sort((left, right) => left.localeCompare(right));
6232
+ if (!anchor || uniqueMembers.length < 3) {
6233
+ return [];
6234
+ }
6235
+ const nodeIds = [anchor.id, ...uniqueMembers];
6236
+ const sourcePageIds = uniqueBy(nodeIds.map((nodeId) => nodesById.get(nodeId)?.pageId ?? "").filter(Boolean), (value) => value);
6237
+ return [
6238
+ {
6239
+ id: `hyper:${sha256(`participate_in|${anchor.id}|${uniqueMembers.join("|")}`).slice(0, 16)}`,
6240
+ label: anchor.label,
6241
+ relation: "participate_in",
6242
+ nodeIds,
6243
+ evidenceClass: "extracted",
6244
+ confidence: Math.min(0.96, 0.72 + uniqueMembers.length * 0.06),
6245
+ sourcePageIds,
6246
+ why: `${uniqueMembers.length} source nodes converge on ${anchor.label} through extracted mention edges.`
6247
+ }
6248
+ ];
6249
+ });
6250
+ }
6251
+ function buildModuleFormHyperedges(graph) {
6252
+ const nodesById = new Map(graph.nodes.map((node) => [node.id, node]));
6253
+ const definedSymbols = /* @__PURE__ */ new Map();
6254
+ for (const edge of graph.edges) {
6255
+ if (edge.relation !== "defines" || edge.evidenceClass !== "extracted") {
6256
+ continue;
6257
+ }
6258
+ const moduleNode = nodesById.get(edge.source);
6259
+ const symbolNode = nodesById.get(edge.target);
6260
+ if (moduleNode?.type !== "module" || symbolNode?.type !== "symbol") {
6261
+ continue;
6262
+ }
6263
+ if (!definedSymbols.has(moduleNode.id)) {
6264
+ definedSymbols.set(moduleNode.id, []);
6265
+ }
6266
+ definedSymbols.get(moduleNode.id)?.push(symbolNode.id);
6267
+ }
6268
+ return [...definedSymbols.entries()].flatMap(([moduleId, members]) => {
6269
+ const moduleNode = nodesById.get(moduleId);
6270
+ const uniqueMembers = uniqueBy(members, (member) => member).sort((left, right) => left.localeCompare(right));
6271
+ if (!moduleNode || uniqueMembers.length < 3) {
6272
+ return [];
6273
+ }
6274
+ const nodeIds = [moduleNode.id, ...uniqueMembers];
6275
+ const sourcePageIds = uniqueBy(nodeIds.map((nodeId) => nodesById.get(nodeId)?.pageId ?? "").filter(Boolean), (value) => value);
6276
+ return [
6277
+ {
6278
+ id: `hyper:${sha256(`form|${moduleNode.id}|${uniqueMembers.join("|")}`).slice(0, 16)}`,
6279
+ label: `${moduleNode.label} API`,
6280
+ relation: "form",
6281
+ nodeIds,
6282
+ evidenceClass: "extracted",
6283
+ confidence: Math.min(0.98, 0.78 + uniqueMembers.length * 0.04),
6284
+ sourcePageIds,
6285
+ why: `${moduleNode.label} and ${uniqueMembers.length} defined symbols form one local module surface.`
6286
+ }
6287
+ ];
6288
+ });
6289
+ }
6290
+ function enrichGraph(graph, manifests, analyses) {
6291
+ const similarityEdges = buildSemanticSimilarityEdges(graph.nodes, graph.edges, manifests, analyses);
6292
+ const enrichedEdges = [...graph.edges, ...similarityEdges].sort((left, right) => left.id.localeCompare(right.id));
6293
+ const hyperedges = uniqueBy(
6294
+ [
6295
+ ...buildTopicHyperedges({ ...graph, edges: enrichedEdges, hyperedges: [] }),
6296
+ ...buildModuleFormHyperedges({ ...graph, edges: enrichedEdges, hyperedges: [] })
6297
+ ].sort((left, right) => right.confidence - left.confidence || left.label.localeCompare(right.label)),
6298
+ (hyperedge) => hyperedge.id
6299
+ );
6300
+ return {
6301
+ edges: enrichedEdges,
6302
+ hyperedges
6303
+ };
6304
+ }
6305
+
5677
6306
  // src/graph-tools.ts
5678
6307
  function normalizeTarget(value) {
5679
6308
  return normalizeWhitespace(value).toLowerCase();
@@ -5684,6 +6313,9 @@ function nodeById(graph) {
5684
6313
  function pageById(graph) {
5685
6314
  return new Map(graph.pages.map((page) => [page.id, page]));
5686
6315
  }
6316
+ function hyperedgesForNode(graph, nodeId) {
6317
+ return (graph.hyperedges ?? []).filter((hyperedge) => hyperedge.nodeIds.includes(nodeId)).sort((left, right) => right.confidence - left.confidence || left.label.localeCompare(right.label));
6318
+ }
5687
6319
  function scoreMatch(query, candidate) {
5688
6320
  const normalizedQuery = normalizeTarget(query);
5689
6321
  const normalizedCandidate = normalizeTarget(candidate);
@@ -5732,6 +6364,14 @@ function nodeMatches(graph, query) {
5732
6364
  score: Math.max(scoreMatch(query, node.label), scoreMatch(query, node.id))
5733
6365
  })).filter((match) => match.score > 0).sort((left, right) => right.score - left.score || left.label.localeCompare(right.label));
5734
6366
  }
6367
+ function hyperedgeMatches(graph, query) {
6368
+ return (graph.hyperedges ?? []).map((hyperedge) => ({
6369
+ type: "hyperedge",
6370
+ id: hyperedge.id,
6371
+ label: hyperedge.label,
6372
+ score: Math.max(scoreMatch(query, hyperedge.label), scoreMatch(query, hyperedge.why), scoreMatch(query, hyperedge.relation))
6373
+ })).filter((match) => match.score > 0).sort((left, right) => right.score - left.score || left.label.localeCompare(right.label));
6374
+ }
5735
6375
  function graphAdjacency(graph) {
5736
6376
  const adjacency = /* @__PURE__ */ new Map();
5737
6377
  const push = (nodeId, item) => {
@@ -5780,14 +6420,15 @@ function queryGraph(graph, question, searchResults, options) {
5780
6420
  const traversal = options?.traversal ?? "bfs";
5781
6421
  const budget = Math.max(3, Math.min(options?.budget ?? 12, 50));
5782
6422
  const matches = uniqueBy(
5783
- [...pageSearchMatches(graph, question, searchResults), ...nodeMatches(graph, question)],
6423
+ [...pageSearchMatches(graph, question, searchResults), ...nodeMatches(graph, question), ...hyperedgeMatches(graph, question)],
5784
6424
  (match) => `${match.type}:${match.id}`
5785
6425
  ).sort((left, right) => right.score - left.score || left.label.localeCompare(right.label)).slice(0, 12);
5786
6426
  const pages = pageById(graph);
5787
6427
  const seeds = uniqueBy(
5788
6428
  [
5789
6429
  ...searchResults.flatMap((result) => pages.get(result.pageId)?.nodeIds ?? []),
5790
- ...matches.filter((match) => match.type === "node").map((match) => match.id)
6430
+ ...matches.filter((match) => match.type === "node").map((match) => match.id),
6431
+ ...matches.filter((match) => match.type === "hyperedge").flatMap((match) => graph.hyperedges.find((hyperedge) => hyperedge.id === match.id)?.nodeIds ?? [])
5791
6432
  ],
5792
6433
  (item) => item
5793
6434
  ).filter(Boolean);
@@ -5828,6 +6469,10 @@ function queryGraph(graph, question, searchResults, options) {
5828
6469
  visitedNodeIds.map((nodeId) => nodes.get(nodeId)?.communityId).filter((communityId) => Boolean(communityId)),
5829
6470
  (item) => item
5830
6471
  );
6472
+ const hyperedgeIds = uniqueBy(
6473
+ (graph.hyperedges ?? []).filter((hyperedge) => hyperedge.nodeIds.some((nodeId) => visitedNodeIds.includes(nodeId))).map((hyperedge) => hyperedge.id),
6474
+ (item) => item
6475
+ );
5831
6476
  return {
5832
6477
  question,
5833
6478
  traversal,
@@ -5838,6 +6483,7 @@ function queryGraph(graph, question, searchResults, options) {
5838
6483
  ),
5839
6484
  visitedNodeIds,
5840
6485
  visitedEdgeIds: [...visitedEdgeIds],
6486
+ hyperedgeIds,
5841
6487
  pageIds,
5842
6488
  communities,
5843
6489
  matches,
@@ -5845,6 +6491,7 @@ function queryGraph(graph, question, searchResults, options) {
5845
6491
  `Seeds: ${seeds.join(", ") || "none"}`,
5846
6492
  `Visited nodes: ${visitedNodeIds.length}`,
5847
6493
  `Visited edges: ${visitedEdgeIds.size}`,
6494
+ `Touched group patterns: ${hyperedgeIds.length}`,
5848
6495
  `Communities: ${communities.join(", ") || "none"}`,
5849
6496
  `Pages: ${pageIds.join(", ") || "none"}`
5850
6497
  ].join("\n")
@@ -5964,11 +6611,13 @@ function explainGraphTarget(graph, target) {
5964
6611
  page,
5965
6612
  community: communityLabel(graph, node.communityId),
5966
6613
  neighbors,
6614
+ hyperedges: hyperedgesForNode(graph, node.id),
5967
6615
  summary: [
5968
6616
  `Node: ${node.label}`,
5969
6617
  `Type: ${node.type}`,
5970
6618
  `Community: ${node.communityId ?? "none"}`,
5971
6619
  `Neighbors: ${neighbors.length}`,
6620
+ `Group patterns: ${hyperedgesForNode(graph, node.id).length}`,
5972
6621
  `Page: ${page?.path ?? "none"}`
5973
6622
  ].join("\n")
5974
6623
  };
@@ -5976,14 +6625,31 @@ function explainGraphTarget(graph, target) {
5976
6625
  function topGodNodes(graph, limit = 10) {
5977
6626
  return graph.nodes.filter((node) => node.isGodNode).sort((left, right) => (right.degree ?? 0) - (left.degree ?? 0)).slice(0, limit);
5978
6627
  }
6628
+ function listHyperedges(graph, target, limit = 25) {
6629
+ if (!target) {
6630
+ return [...graph.hyperedges ?? []].sort((left, right) => right.confidence - left.confidence || left.label.localeCompare(right.label)).slice(0, limit);
6631
+ }
6632
+ const node = resolveNode(graph, target);
6633
+ if (node) {
6634
+ return hyperedgesForNode(graph, node.id).slice(0, limit);
6635
+ }
6636
+ const page = graph.pages.find((candidate) => normalizeTarget(candidate.path) === normalizeTarget(target) || candidate.id === target);
6637
+ if (!page) {
6638
+ return [];
6639
+ }
6640
+ return (graph.hyperedges ?? []).filter((hyperedge) => hyperedge.sourcePageIds.includes(page.id) || page.nodeIds.some((nodeId) => hyperedge.nodeIds.includes(nodeId))).sort((left, right) => right.confidence - left.confidence || left.label.localeCompare(right.label)).slice(0, limit);
6641
+ }
5979
6642
 
5980
6643
  // src/markdown.ts
5981
- import matter4 from "gray-matter";
5982
- function uniqueStrings(values) {
6644
+ import matter5 from "gray-matter";
6645
+ function uniqueStrings2(values) {
5983
6646
  return uniqueBy(values.filter(Boolean), (value) => value);
5984
6647
  }
6648
+ function safeFrontmatter(value) {
6649
+ return JSON.parse(JSON.stringify(value));
6650
+ }
5985
6651
  function decoratedTags(baseTags, decorations) {
5986
- return uniqueStrings([
6652
+ return uniqueStrings2([
5987
6653
  ...baseTags,
5988
6654
  ...(decorations?.projectIds ?? []).map((projectId) => `project/${projectId}`),
5989
6655
  ...decorations?.extraTags ?? []
@@ -6062,6 +6728,7 @@ function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutput
6062
6728
  page_id: pageId,
6063
6729
  kind: "source",
6064
6730
  title: analysis.title,
6731
+ ...manifest.sourceType ? { source_type: manifest.sourceType } : {},
6065
6732
  tags: decoratedTags(analysis.code ? ["source", "code"] : ["source"], decorations),
6066
6733
  source_ids: [manifest.sourceId],
6067
6734
  project_ids: decorations?.projectIds ?? [],
@@ -6084,6 +6751,7 @@ function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutput
6084
6751
  "",
6085
6752
  `Source ID: \`${manifest.sourceId}\``,
6086
6753
  manifest.url ? `Source URL: ${manifest.url}` : `Source Path: \`${manifest.originalPath ?? manifest.storedPath}\``,
6754
+ ...manifest.sourceType ? [`Source Type: \`${manifest.sourceType}\``, ""] : [""],
6087
6755
  "",
6088
6756
  "## Summary",
6089
6757
  "",
@@ -6128,6 +6796,7 @@ function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutput
6128
6796
  path: relativePath,
6129
6797
  title: analysis.title,
6130
6798
  kind: "source",
6799
+ sourceType: manifest.sourceType,
6131
6800
  sourceIds: [manifest.sourceId],
6132
6801
  projectIds: decorations?.projectIds ?? [],
6133
6802
  nodeIds,
@@ -6145,7 +6814,7 @@ function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutput
6145
6814
  compiledFrom: metadata.compiledFrom,
6146
6815
  managedBy: metadata.managedBy
6147
6816
  },
6148
- content: matter4.stringify(body, frontmatter)
6817
+ content: matter5.stringify(body, safeFrontmatter(frontmatter))
6149
6818
  };
6150
6819
  }
6151
6820
  function buildModulePage(input) {
@@ -6160,7 +6829,7 @@ function buildModulePage(input) {
6160
6829
  const nodeIds = [code.moduleId, ...code.symbols.map((symbol) => symbol.id)];
6161
6830
  const localModuleBacklinks = input.localModules.map((moduleRef) => moduleRef.page.id);
6162
6831
  const relatedOutputs = input.relatedOutputs ?? [];
6163
- const backlinks = uniqueStrings([sourcePage.id, ...localModuleBacklinks, ...relatedOutputs.map((page) => page.id)]);
6832
+ const backlinks = uniqueStrings2([sourcePage.id, ...localModuleBacklinks, ...relatedOutputs.map((page) => page.id)]);
6164
6833
  const importsSection = code.imports.length ? code.imports.map((item) => {
6165
6834
  const localModule = item.resolvedSourceId ? input.localModules.find((moduleRef) => moduleRef.sourceId === item.resolvedSourceId && moduleRef.reExport === item.reExport) : void 0;
6166
6835
  const importedBits = [
@@ -6206,9 +6875,9 @@ function buildModulePage(input) {
6206
6875
  source_hashes: {
6207
6876
  [manifest.sourceId]: manifest.contentHash
6208
6877
  },
6209
- related_page_ids: uniqueStrings([sourcePage.id, ...localModuleBacklinks, ...relatedOutputs.map((page) => page.id)]),
6878
+ related_page_ids: uniqueStrings2([sourcePage.id, ...localModuleBacklinks, ...relatedOutputs.map((page) => page.id)]),
6210
6879
  related_node_ids: [],
6211
- related_source_ids: uniqueStrings([
6880
+ related_source_ids: uniqueStrings2([
6212
6881
  manifest.sourceId,
6213
6882
  ...input.localModules.map((moduleRef) => moduleRef.sourceId),
6214
6883
  ...relatedOutputs.flatMap((page) => page.sourceIds)
@@ -6280,9 +6949,9 @@ function buildModulePage(input) {
6280
6949
  backlinks,
6281
6950
  schemaHash,
6282
6951
  sourceHashes: { [manifest.sourceId]: manifest.contentHash },
6283
- relatedPageIds: uniqueStrings([sourcePage.id, ...localModuleBacklinks, ...relatedOutputs.map((page) => page.id)]),
6952
+ relatedPageIds: uniqueStrings2([sourcePage.id, ...localModuleBacklinks, ...relatedOutputs.map((page) => page.id)]),
6284
6953
  relatedNodeIds: [],
6285
- relatedSourceIds: uniqueStrings([
6954
+ relatedSourceIds: uniqueStrings2([
6286
6955
  manifest.sourceId,
6287
6956
  ...input.localModules.map((moduleRef) => moduleRef.sourceId),
6288
6957
  ...relatedOutputs.flatMap((page) => page.sourceIds)
@@ -6292,7 +6961,7 @@ function buildModulePage(input) {
6292
6961
  compiledFrom: metadata.compiledFrom,
6293
6962
  managedBy: metadata.managedBy
6294
6963
  },
6295
- content: matter4.stringify(body, frontmatter)
6964
+ content: matter5.stringify(body, frontmatter)
6296
6965
  };
6297
6966
  }
6298
6967
  function buildAggregatePage(kind, name, descriptions, sourceAnalyses, sourceHashes, schemaHash, metadata, relativePath, relatedOutputs = [], decorations) {
@@ -6363,7 +7032,7 @@ function buildAggregatePage(kind, name, descriptions, sourceAnalyses, sourceHash
6363
7032
  compiledFrom: metadata.compiledFrom,
6364
7033
  managedBy: metadata.managedBy
6365
7034
  },
6366
- content: matter4.stringify(body, frontmatter)
7035
+ content: matter5.stringify(body, frontmatter)
6367
7036
  };
6368
7037
  }
6369
7038
  function buildIndexPage(pages, schemaHash, metadata, projectPages = []) {
@@ -6439,7 +7108,7 @@ function buildIndexPage(pages, schemaHash, metadata, projectPages = []) {
6439
7108
  }
6440
7109
  function buildSectionIndex(kind, pages, schemaHash, metadata, projectIds = []) {
6441
7110
  const title = kind.charAt(0).toUpperCase() + kind.slice(1);
6442
- return matter4.stringify(
7111
+ return matter5.stringify(
6443
7112
  [`# ${title}`, "", ...pages.map((page) => `- [[${page.path.replace(/\.md$/, "")}|${page.title}]]`), ""].join("\n"),
6444
7113
  {
6445
7114
  page_id: `${kind}:index`,
@@ -6470,38 +7139,219 @@ function nodeSummary(node) {
6470
7139
  const bridge = typeof node.bridgeScore === "number" ? `bridge=${node.bridgeScore}` : "";
6471
7140
  return [node.type, degree, bridge].filter(Boolean).join(", ");
6472
7141
  }
6473
- function crossCommunityEdges(graph) {
7142
+ function sourceTypeForNode(node, pagesById) {
7143
+ if (!node?.pageId) {
7144
+ return void 0;
7145
+ }
7146
+ return pagesById.get(node.pageId)?.sourceType;
7147
+ }
7148
+ function supportingPathDetails(graph, edge) {
7149
+ const path23 = shortestGraphPath(graph, edge.source, edge.target);
7150
+ const edgesById = new Map(graph.edges.map((item) => [item.id, item]));
7151
+ const pathEdges = path23.edgeIds.map((edgeId) => edgesById.get(edgeId)).filter((item) => Boolean(item));
7152
+ return {
7153
+ pathNodeIds: path23.nodeIds,
7154
+ pathEdgeIds: path23.edgeIds,
7155
+ pathRelations: pathEdges.map((item) => item.relation),
7156
+ pathEvidenceClasses: pathEdges.map((item) => item.evidenceClass),
7157
+ pathSummary: path23.summary
7158
+ };
7159
+ }
7160
+ function surpriseScore(edge, graph, pagesById, hyperedgesByNodeId) {
6474
7161
  const nodesById = new Map(graph.nodes.map((node) => [node.id, node]));
6475
- return graph.edges.filter((edge) => {
6476
- const source = nodesById.get(edge.source);
6477
- const target = nodesById.get(edge.target);
6478
- return source?.communityId && target?.communityId && source.communityId !== target.communityId;
6479
- }).sort((left, right) => right.confidence - left.confidence || left.relation.localeCompare(right.relation));
7162
+ const source = nodesById.get(edge.source);
7163
+ const target = nodesById.get(edge.target);
7164
+ const reasons = [];
7165
+ let score = edge.confidence * 0.45;
7166
+ if (source?.communityId && target?.communityId && source.communityId !== target.communityId) {
7167
+ score += 0.18;
7168
+ reasons.push(`it crosses communities ${source.communityId} and ${target.communityId}`);
7169
+ }
7170
+ if (source?.pageId && target?.pageId && source.pageId !== target.pageId) {
7171
+ score += 0.12;
7172
+ reasons.push("it spans different canonical pages");
7173
+ }
7174
+ if (source?.type && target?.type && source.type !== target.type) {
7175
+ score += 0.08;
7176
+ reasons.push(`it bridges ${source.type} and ${target.type} nodes`);
7177
+ }
7178
+ const sourceType = sourceTypeForNode(source, pagesById);
7179
+ const targetType = sourceTypeForNode(target, pagesById);
7180
+ if (sourceType && targetType && sourceType !== targetType) {
7181
+ score += 0.07;
7182
+ reasons.push(`it crosses source types (${sourceType} and ${targetType})`);
7183
+ }
7184
+ if ((source?.bridgeScore ?? 0) > 0 || (target?.bridgeScore ?? 0) > 0) {
7185
+ score += 0.08;
7186
+ reasons.push("a bridge node is involved");
7187
+ }
7188
+ if (edge.relation === "semantically_similar_to") {
7189
+ score += 0.12;
7190
+ reasons.push(describeSimilarityReasons(edge.similarityReasons));
7191
+ }
7192
+ if (edge.evidenceClass === "ambiguous") {
7193
+ score += 0.08;
7194
+ reasons.push("the supporting evidence is ambiguous");
7195
+ }
7196
+ const overlappingHyperedges = (hyperedgesByNodeId.get(edge.source) ?? []).filter((hyperedge) => hyperedge.nodeIds.includes(edge.target));
7197
+ if (overlappingHyperedges.length) {
7198
+ score += 0.06;
7199
+ reasons.push(`it also appears in ${overlappingHyperedges.length} group pattern${overlappingHyperedges.length === 1 ? "" : "s"}`);
7200
+ }
7201
+ const why = normalizeWhitespace(reasons.join("; ")) || "it links graph regions that are otherwise weakly connected";
7202
+ const explanation = normalizeWhitespace(`${source?.label ?? edge.source} connects to ${target?.label ?? edge.target} because ${why}.`);
7203
+ return { score: Math.min(0.99, score), why, explanation };
7204
+ }
7205
+ function topSurprisingConnections(graph, pagesById) {
7206
+ const nodesById = new Map(graph.nodes.map((node) => [node.id, node]));
7207
+ const hyperedgesByNodeId = /* @__PURE__ */ new Map();
7208
+ for (const hyperedge of graph.hyperedges ?? []) {
7209
+ for (const nodeId of hyperedge.nodeIds) {
7210
+ if (!hyperedgesByNodeId.has(nodeId)) {
7211
+ hyperedgesByNodeId.set(nodeId, []);
7212
+ }
7213
+ hyperedgesByNodeId.get(nodeId)?.push(hyperedge);
7214
+ }
7215
+ }
7216
+ return uniqueBy(
7217
+ graph.edges.filter((edge) => {
7218
+ const source = nodesById.get(edge.source);
7219
+ const target = nodesById.get(edge.target);
7220
+ return Boolean(
7221
+ source?.communityId && target?.communityId && source.communityId !== target.communityId || edge.relation === "semantically_similar_to" || edge.evidenceClass === "ambiguous" || source?.type && target?.type && source.type !== target.type
7222
+ );
7223
+ }).map((edge) => {
7224
+ const source = nodesById.get(edge.source);
7225
+ const target = nodesById.get(edge.target);
7226
+ const path23 = supportingPathDetails(graph, edge);
7227
+ const scored = surpriseScore(edge, graph, pagesById, hyperedgesByNodeId);
7228
+ return {
7229
+ id: edge.id,
7230
+ sourceNodeId: edge.source,
7231
+ sourceLabel: source?.label ?? edge.source,
7232
+ targetNodeId: edge.target,
7233
+ targetLabel: target?.label ?? edge.target,
7234
+ relation: edge.relation,
7235
+ evidenceClass: edge.evidenceClass,
7236
+ confidence: edge.confidence,
7237
+ pathNodeIds: path23.pathNodeIds,
7238
+ pathEdgeIds: path23.pathEdgeIds,
7239
+ pathRelations: path23.pathRelations,
7240
+ pathEvidenceClasses: path23.pathEvidenceClasses,
7241
+ pathSummary: path23.pathSummary,
7242
+ why: scored.why,
7243
+ explanation: scored.explanation,
7244
+ surpriseScore: scored.score
7245
+ };
7246
+ }).sort(
7247
+ (left, right) => right.surpriseScore - left.surpriseScore || right.confidence - left.confidence || left.id.localeCompare(right.id)
7248
+ ).slice(0, 8),
7249
+ (connection) => connection.id
7250
+ ).map(({ surpriseScore: _surpriseScore, ...connection }) => connection);
7251
+ }
7252
+ function topGroupPatterns(graph) {
7253
+ return [...graph.hyperedges ?? []].sort(
7254
+ (left, right) => right.confidence - left.confidence || right.nodeIds.length - left.nodeIds.length || left.label.localeCompare(right.label)
7255
+ ).slice(0, 8);
6480
7256
  }
6481
7257
  function suggestedGraphQuestions(graph) {
6482
7258
  const thinCommunities = (graph.communities ?? []).filter((community) => community.nodeIds.length <= 2);
6483
7259
  const bridgeNodes = graph.nodes.filter((node) => (node.bridgeScore ?? 0) > 0).sort((left, right) => (right.bridgeScore ?? 0) - (left.bridgeScore ?? 0)).slice(0, 3);
6484
- return uniqueStrings([
7260
+ return uniqueStrings2([
6485
7261
  ...thinCommunities.map((community) => `What sources would strengthen community ${community.label}?`),
6486
7262
  ...bridgeNodes.map((node) => `Why does ${node.label} connect multiple communities in the vault?`)
6487
7263
  ]).slice(0, 6);
6488
7264
  }
7265
+ function buildGraphReportArtifact(input) {
7266
+ const pagesById = new Map(input.graph.pages.map((page) => [page.id, page]));
7267
+ const godNodes = input.graph.nodes.filter((node) => node.isGodNode).sort((left, right) => (right.degree ?? 0) - (left.degree ?? 0)).slice(0, 8);
7268
+ const bridgeNodes = input.graph.nodes.filter((node) => (node.bridgeScore ?? 0) > 0).sort((left, right) => (right.bridgeScore ?? 0) - (left.bridgeScore ?? 0)).slice(0, 8);
7269
+ const thinCommunities = (input.graph.communities ?? []).filter((community) => community.nodeIds.length <= 2).map((community) => {
7270
+ const page = input.communityPages.find((candidate) => candidate.id === `graph:${community.id}`);
7271
+ return {
7272
+ id: community.id,
7273
+ label: community.label,
7274
+ nodeCount: community.nodeIds.length,
7275
+ pageId: page?.id,
7276
+ path: page?.path,
7277
+ title: page?.title
7278
+ };
7279
+ });
7280
+ const surprisingConnections = topSurprisingConnections(input.graph, pagesById);
7281
+ const groupPatterns = topGroupPatterns(input.graph);
7282
+ return {
7283
+ generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
7284
+ graphHash: input.graphHash,
7285
+ overview: {
7286
+ nodes: input.graph.nodes.length,
7287
+ edges: input.graph.edges.length,
7288
+ pages: input.graph.pages.length,
7289
+ communities: input.graph.communities?.length ?? 0
7290
+ },
7291
+ benchmark: input.benchmark ? {
7292
+ generatedAt: input.benchmark.generatedAt,
7293
+ stale: input.benchmarkStale ?? false,
7294
+ summary: input.benchmark.summary,
7295
+ questionCount: input.benchmark.sampleQuestions.length
7296
+ } : void 0,
7297
+ godNodes: godNodes.map((node) => ({
7298
+ nodeId: node.id,
7299
+ label: node.label,
7300
+ pageId: node.pageId,
7301
+ degree: node.degree,
7302
+ bridgeScore: node.bridgeScore
7303
+ })),
7304
+ bridgeNodes: bridgeNodes.map((node) => ({
7305
+ nodeId: node.id,
7306
+ label: node.label,
7307
+ pageId: node.pageId,
7308
+ degree: node.degree,
7309
+ bridgeScore: node.bridgeScore
7310
+ })),
7311
+ thinCommunities,
7312
+ surprisingConnections,
7313
+ groupPatterns,
7314
+ suggestedQuestions: suggestedGraphQuestions(input.graph),
7315
+ communityPages: input.communityPages.map((page) => ({
7316
+ id: page.id,
7317
+ path: page.path,
7318
+ title: page.title
7319
+ })),
7320
+ recentResearchSources: (input.recentResearchSources ?? []).map((page) => ({
7321
+ pageId: page.id,
7322
+ path: page.path,
7323
+ title: page.title,
7324
+ sourceType: page.sourceType,
7325
+ updatedAt: page.updatedAt
7326
+ }))
7327
+ };
7328
+ }
6489
7329
  function buildGraphReportPage(input) {
6490
7330
  const pageId = "graph:report";
6491
7331
  const pathValue = pagePathFor("graph_report", "report");
6492
7332
  const pagesById = new Map(input.graph.pages.map((page) => [page.id, page]));
6493
7333
  const nodesById = new Map(input.graph.nodes.map((node) => [node.id, node]));
6494
- const godNodes = input.graph.nodes.filter((node) => node.isGodNode).sort((left, right) => (right.degree ?? 0) - (left.degree ?? 0)).slice(0, 8);
6495
- const bridgeNodes = input.graph.nodes.filter((node) => (node.bridgeScore ?? 0) > 0).sort((left, right) => (right.bridgeScore ?? 0) - (left.bridgeScore ?? 0)).slice(0, 8);
6496
- const surprisingEdges = crossCommunityEdges(input.graph).slice(0, 8);
6497
- const thinCommunities = (input.graph.communities ?? []).filter((community) => community.nodeIds.length <= 2);
6498
- const relatedNodeIds = uniqueStrings([...godNodes, ...bridgeNodes].map((node) => node.id));
6499
- const relatedPageIds = uniqueStrings([
6500
- ...godNodes.map((node) => node.pageId ?? ""),
6501
- ...bridgeNodes.map((node) => node.pageId ?? ""),
6502
- ...input.communityPages.map((page) => page.id)
7334
+ const relatedNodeIds = uniqueStrings2([
7335
+ ...input.report.godNodes.map((node) => node.nodeId),
7336
+ ...input.report.bridgeNodes.map((node) => node.nodeId),
7337
+ ...input.report.surprisingConnections.flatMap((connection) => [
7338
+ connection.sourceNodeId,
7339
+ connection.targetNodeId,
7340
+ ...connection.pathNodeIds
7341
+ ]),
7342
+ ...input.report.groupPatterns.flatMap((hyperedge) => hyperedge.nodeIds)
7343
+ ]);
7344
+ const relatedPageIds = uniqueStrings2([
7345
+ ...input.report.godNodes.map((node) => node.pageId ?? ""),
7346
+ ...input.report.bridgeNodes.map((node) => node.pageId ?? ""),
7347
+ ...input.report.communityPages.map((page) => page.id),
7348
+ ...input.report.recentResearchSources.map((page) => page.pageId),
7349
+ ...input.report.groupPatterns.flatMap((hyperedge) => hyperedge.sourcePageIds)
7350
+ ]);
7351
+ const relatedSourceIds = uniqueStrings2([
7352
+ ...relatedNodeIds.flatMap((nodeId) => nodesById.get(nodeId)?.sourceIds ?? []),
7353
+ ...input.report.recentResearchSources.flatMap((page) => pagesById.get(page.pageId)?.sourceIds ?? [])
6503
7354
  ]);
6504
- const relatedSourceIds = uniqueStrings(relatedNodeIds.flatMap((nodeId) => nodesById.get(nodeId)?.sourceIds ?? []));
6505
7355
  const frontmatter = {
6506
7356
  page_id: pageId,
6507
7357
  kind: "graph_report",
@@ -6529,47 +7379,73 @@ function buildGraphReportPage(input) {
6529
7379
  "",
6530
7380
  "## Overview",
6531
7381
  "",
6532
- `- Nodes: ${input.graph.nodes.length}`,
6533
- `- Edges: ${input.graph.edges.length}`,
6534
- `- Pages: ${input.graph.pages.length}`,
6535
- `- Communities: ${input.graph.communities?.length ?? 0}`,
7382
+ `- Nodes: ${input.report.overview.nodes}`,
7383
+ `- Edges: ${input.report.overview.edges}`,
7384
+ `- Pages: ${input.report.overview.pages}`,
7385
+ `- Communities: ${input.report.overview.communities}`,
6536
7386
  "",
6537
- ...input.benchmark ? [
6538
- "## Benchmark",
6539
- "",
6540
- `- Corpus Tokens: ${input.benchmark.corpusTokens}`,
6541
- `- Avg Query Tokens: ${input.benchmark.avgQueryTokens}`,
6542
- `- Reduction Ratio: ${(input.benchmark.reductionRatio * 100).toFixed(1)}%`,
6543
- `- Sample Questions: ${input.benchmark.sampleQuestions.length}`,
7387
+ "## Benchmark Summary",
7388
+ "",
7389
+ ...input.report.benchmark ? [
7390
+ `- Generated At: ${input.report.benchmark.generatedAt}`,
7391
+ `- Status: ${input.report.benchmark.stale ? "Stale (graph changed since benchmark ran)" : "Fresh"}`,
7392
+ `- Naive Corpus Tokens: ${input.report.benchmark.summary.naiveCorpusTokens}`,
7393
+ `- Final Context Tokens: ${input.report.benchmark.summary.finalContextTokens}`,
7394
+ `- Unique Nodes Considered: ${input.report.benchmark.summary.uniqueVisitedNodes}`,
7395
+ `- Reduction Ratio: ${(input.report.benchmark.summary.reductionRatio * 100).toFixed(1)}%`,
7396
+ `- Questions: ${input.report.benchmark.questionCount}`,
6544
7397
  ""
6545
- ] : [],
6546
- "## God Nodes",
7398
+ ] : ["- No benchmark results yet.", ""],
7399
+ "## Top God Nodes",
6547
7400
  "",
6548
- ...godNodes.length ? godNodes.map((node) => `- ${graphNodeLink(node, pagesById)} (${nodeSummary(node)})`) : ["- No high-connectivity nodes detected."],
7401
+ ...input.report.godNodes.length ? input.report.godNodes.map((node) => {
7402
+ const graphNode = nodesById.get(node.nodeId);
7403
+ return graphNode ? `- ${graphNodeLink(graphNode, pagesById)} (${nodeSummary(graphNode)})` : `- \`${node.nodeId}\``;
7404
+ }) : ["- No high-connectivity nodes detected."],
6549
7405
  "",
6550
- "## Bridge Nodes",
7406
+ "## Top Bridge Nodes",
6551
7407
  "",
6552
- ...bridgeNodes.length ? bridgeNodes.map((node) => `- ${graphNodeLink(node, pagesById)} (${nodeSummary(node)})`) : ["- No cross-community bridge nodes detected."],
7408
+ ...input.report.bridgeNodes.length ? input.report.bridgeNodes.map((node) => {
7409
+ const graphNode = nodesById.get(node.nodeId);
7410
+ return graphNode ? `- ${graphNodeLink(graphNode, pagesById)} (${nodeSummary(graphNode)})` : `- \`${node.nodeId}\``;
7411
+ }) : ["- No cross-community bridge nodes detected."],
6553
7412
  "",
6554
7413
  "## Communities",
6555
7414
  "",
6556
- ...input.communityPages.length ? input.communityPages.map((page) => `- ${pageLink(page)}`) : ["- No community summaries generated yet."],
7415
+ ...input.report.communityPages.length ? input.report.communityPages.map((page) => `- ${pageLink(page)}`) : ["- No community summaries generated yet."],
6557
7416
  "",
6558
- "## Thin Communities",
7417
+ "## Thin Or Underlinked Areas",
6559
7418
  "",
6560
- ...thinCommunities.length ? thinCommunities.map((community) => `- ${community.label} (${community.nodeIds.length} node(s))`) : ["- No thin communities detected."],
7419
+ ...input.report.thinCommunities.length ? input.report.thinCommunities.map(
7420
+ (community) => community.path ? `- [[${community.path.replace(/\.md$/, "")}|${community.title ?? community.label}]] (${community.nodeCount} node(s))` : `- ${community.label} (${community.nodeCount} node(s))`
7421
+ ) : ["- No thin communities detected."],
6561
7422
  "",
6562
- "## Cross-Community Surprises",
7423
+ "## Surprising Connections",
6563
7424
  "",
6564
- ...surprisingEdges.length ? surprisingEdges.map((edge) => {
6565
- const source = nodesById.get(edge.source);
6566
- const target = nodesById.get(edge.target);
6567
- return `- ${source ? graphNodeLink(source, pagesById) : `\`${edge.source}\``} ${edge.relation} ${target ? graphNodeLink(target, pagesById) : `\`${edge.target}\``} (${edge.evidenceClass}, ${edge.confidence.toFixed(2)})`;
7425
+ ...input.report.surprisingConnections.length ? input.report.surprisingConnections.map((connection) => {
7426
+ const source = nodesById.get(connection.sourceNodeId);
7427
+ const target = nodesById.get(connection.targetNodeId);
7428
+ const sourceLabel = source ? graphNodeLink(source, pagesById) : `\`${connection.sourceNodeId}\``;
7429
+ const targetLabel = target ? graphNodeLink(target, pagesById) : `\`${connection.targetNodeId}\``;
7430
+ return `- ${sourceLabel} ${connection.relation} ${targetLabel} (${connection.evidenceClass}, ${connection.confidence.toFixed(2)}). Why: ${connection.why}. ${connection.explanation} Path: ${connection.pathSummary}.`;
6568
7431
  }) : ["- No cross-community links detected."],
6569
7432
  "",
6570
- "## Suggested Follow-Up Questions",
7433
+ "## Group Patterns",
7434
+ "",
7435
+ ...input.report.groupPatterns.length ? input.report.groupPatterns.map((hyperedge) => {
7436
+ const linkedNodes = hyperedge.nodeIds.map((nodeId) => nodesById.get(nodeId)).filter((node) => Boolean(node)).map((node) => graphNodeLink(node, pagesById)).join(", ");
7437
+ return `- ${hyperedge.label} (${hyperedge.relation}, ${hyperedge.evidenceClass}, ${hyperedge.confidence.toFixed(2)}). ${hyperedge.why} Members: ${linkedNodes}.`;
7438
+ }) : ["- No multi-node group patterns detected."],
7439
+ "",
7440
+ "## New Research Sources",
7441
+ "",
7442
+ ...input.report.recentResearchSources.length ? input.report.recentResearchSources.map(
7443
+ (page) => `- [[${page.path.replace(/\.md$/, "")}|${page.title}]] (\`${page.sourceType}\`, updated ${page.updatedAt})`
7444
+ ) : ["- No newly captured research sources since the previous compile."],
7445
+ "",
7446
+ "## Suggested Questions",
6571
7447
  "",
6572
- ...suggestedGraphQuestions(input.graph).map((question) => `- ${question}`),
7448
+ ...input.report.suggestedQuestions.map((question) => `- ${question}`),
6573
7449
  ""
6574
7450
  ].join("\n");
6575
7451
  return {
@@ -6595,7 +7471,7 @@ function buildGraphReportPage(input) {
6595
7471
  compiledFrom: input.metadata.compiledFrom,
6596
7472
  managedBy: input.metadata.managedBy
6597
7473
  },
6598
- content: matter4.stringify(body, frontmatter)
7474
+ content: matter5.stringify(body, frontmatter)
6599
7475
  };
6600
7476
  }
6601
7477
  function buildCommunitySummaryPage(input) {
@@ -6604,14 +7480,14 @@ function buildCommunitySummaryPage(input) {
6604
7480
  const nodesById = new Map(input.graph.nodes.map((node) => [node.id, node]));
6605
7481
  const pagesById = new Map(input.graph.pages.map((page) => [page.id, page]));
6606
7482
  const communityNodes = input.community.nodeIds.map((nodeId) => nodesById.get(nodeId)).filter((node) => Boolean(node));
6607
- const communityPageIds = uniqueStrings(communityNodes.map((node) => node.pageId ?? ""));
7483
+ const communityPageIds = uniqueStrings2(communityNodes.map((node) => node.pageId ?? ""));
6608
7484
  const communityPages = communityPageIds.map((id) => pagesById.get(id)).filter((page) => Boolean(page));
6609
7485
  const externalEdges = input.graph.edges.filter((edge) => {
6610
7486
  const source = nodesById.get(edge.source);
6611
7487
  const target = nodesById.get(edge.target);
6612
7488
  return source?.communityId === input.community.id && target?.communityId && target.communityId !== input.community.id;
6613
7489
  }).slice(0, 8);
6614
- const relatedSourceIds = uniqueStrings(communityNodes.flatMap((node) => node.sourceIds));
7490
+ const relatedSourceIds = uniqueStrings2(communityNodes.flatMap((node) => node.sourceIds));
6615
7491
  const frontmatter = {
6616
7492
  page_id: pageId,
6617
7493
  kind: "community_summary",
@@ -6630,7 +7506,7 @@ function buildCommunitySummaryPage(input) {
6630
7506
  backlinks: ["graph:report"],
6631
7507
  schema_hash: input.schemaHash,
6632
7508
  source_hashes: {},
6633
- related_page_ids: uniqueStrings(["graph:report", ...communityPageIds]),
7509
+ related_page_ids: uniqueStrings2(["graph:report", ...communityPageIds]),
6634
7510
  related_node_ids: input.community.nodeIds,
6635
7511
  related_source_ids: relatedSourceIds
6636
7512
  };
@@ -6669,7 +7545,7 @@ function buildCommunitySummaryPage(input) {
6669
7545
  backlinks: ["graph:report"],
6670
7546
  schemaHash: input.schemaHash,
6671
7547
  sourceHashes: {},
6672
- relatedPageIds: uniqueStrings(["graph:report", ...communityPageIds]),
7548
+ relatedPageIds: uniqueStrings2(["graph:report", ...communityPageIds]),
6673
7549
  relatedNodeIds: input.community.nodeIds,
6674
7550
  relatedSourceIds,
6675
7551
  createdAt: input.metadata.createdAt,
@@ -6677,11 +7553,11 @@ function buildCommunitySummaryPage(input) {
6677
7553
  compiledFrom: input.metadata.compiledFrom,
6678
7554
  managedBy: input.metadata.managedBy
6679
7555
  },
6680
- content: matter4.stringify(body, frontmatter)
7556
+ content: matter5.stringify(body, frontmatter)
6681
7557
  };
6682
7558
  }
6683
7559
  function buildProjectsIndex(projectPages, schemaHash, metadata) {
6684
- return matter4.stringify(
7560
+ return matter5.stringify(
6685
7561
  [
6686
7562
  "# Projects",
6687
7563
  "",
@@ -6711,7 +7587,7 @@ function buildProjectsIndex(projectPages, schemaHash, metadata) {
6711
7587
  }
6712
7588
  function buildProjectIndex(input) {
6713
7589
  const title = `Project: ${input.projectId}`;
6714
- return matter4.stringify(
7590
+ return matter5.stringify(
6715
7591
  [
6716
7592
  `# ${title}`,
6717
7593
  "",
@@ -6824,7 +7700,7 @@ function buildOutputPage(input) {
6824
7700
  outputFormat: input.outputFormat,
6825
7701
  outputAssets
6826
7702
  },
6827
- content: matter4.stringify(
7703
+ content: matter5.stringify(
6828
7704
  (input.outputFormat === "slides" ? [
6829
7705
  input.answer,
6830
7706
  "",
@@ -6950,7 +7826,7 @@ function buildExploreHubPage(input) {
6950
7826
  outputFormat: input.outputFormat,
6951
7827
  outputAssets
6952
7828
  },
6953
- content: matter4.stringify(
7829
+ content: matter5.stringify(
6954
7830
  (input.outputFormat === "slides" ? [
6955
7831
  `# ${title}`,
6956
7832
  "",
@@ -7216,12 +8092,12 @@ function buildOutputAssetManifest(input) {
7216
8092
  // src/outputs.ts
7217
8093
  import fs13 from "fs/promises";
7218
8094
  import path16 from "path";
7219
- import matter6 from "gray-matter";
8095
+ import matter7 from "gray-matter";
7220
8096
 
7221
8097
  // src/pages.ts
7222
8098
  import fs12 from "fs/promises";
7223
8099
  import path15 from "path";
7224
- import matter5 from "gray-matter";
8100
+ import matter6 from "gray-matter";
7225
8101
  function normalizeStringArray(value) {
7226
8102
  return Array.isArray(value) ? value.filter((item) => typeof item === "string") : [];
7227
8103
  }
@@ -7242,6 +8118,9 @@ function normalizePageStatus(value, fallback = "active") {
7242
8118
  function normalizePageManager(value, fallback = "system") {
7243
8119
  return value === "human" || value === "system" ? value : fallback;
7244
8120
  }
8121
+ function normalizeSourceType(value) {
8122
+ return value === "arxiv" || value === "doi" || value === "tweet" || value === "article" || value === "url" ? value : void 0;
8123
+ }
7245
8124
  function normalizeOutputFormat(value, fallback = "markdown") {
7246
8125
  return value === "report" || value === "slides" || value === "chart" || value === "image" ? value : fallback;
7247
8126
  }
@@ -7293,7 +8172,7 @@ async function loadExistingManagedPageState(absolutePath, defaults = {}) {
7293
8172
  };
7294
8173
  }
7295
8174
  const content = await fs12.readFile(absolutePath, "utf8");
7296
- const parsed = matter5(content);
8175
+ const parsed = matter6(content);
7297
8176
  return {
7298
8177
  status: normalizePageStatus(parsed.data.status, defaults.status ?? "active"),
7299
8178
  managedBy: normalizePageManager(parsed.data.managed_by, defaults.managedBy ?? "system"),
@@ -7327,7 +8206,7 @@ function inferPageKind(relativePath, explicitKind = void 0) {
7327
8206
  return "index";
7328
8207
  }
7329
8208
  function parseStoredPage(relativePath, content, defaults = {}) {
7330
- const parsed = matter5(content);
8209
+ const parsed = matter6(content);
7331
8210
  const now = (/* @__PURE__ */ new Date()).toISOString();
7332
8211
  const fallbackCreatedAt = defaults.createdAt ?? now;
7333
8212
  const fallbackUpdatedAt = defaults.updatedAt ?? fallbackCreatedAt;
@@ -7347,6 +8226,7 @@ function parseStoredPage(relativePath, content, defaults = {}) {
7347
8226
  path: relativePath,
7348
8227
  title,
7349
8228
  kind,
8229
+ sourceType: normalizeSourceType(parsed.data.source_type),
7350
8230
  sourceIds,
7351
8231
  projectIds,
7352
8232
  nodeIds,
@@ -7379,7 +8259,7 @@ async function loadInsightPages(wikiDir) {
7379
8259
  for (const absolutePath of files) {
7380
8260
  const relativePath = toPosix(path15.relative(wikiDir, absolutePath));
7381
8261
  const content = await fs12.readFile(absolutePath, "utf8");
7382
- const parsed = matter5(content);
8262
+ const parsed = matter6(content);
7383
8263
  const stats = await fs12.stat(absolutePath);
7384
8264
  const title = typeof parsed.data.title === "string" ? parsed.data.title : path15.basename(absolutePath, ".md");
7385
8265
  const sourceIds = normalizeStringArray(parsed.data.source_ids);
@@ -7464,7 +8344,7 @@ async function loadSavedOutputPages(wikiDir) {
7464
8344
  const relativePath = path16.posix.join("outputs", entry.name);
7465
8345
  const absolutePath = path16.join(outputsDir, entry.name);
7466
8346
  const content = await fs13.readFile(absolutePath, "utf8");
7467
- const parsed = matter6(content);
8347
+ const parsed = matter7(content);
7468
8348
  const slug = entry.name.replace(/\.md$/, "");
7469
8349
  const title = typeof parsed.data.title === "string" ? parsed.data.title : slug;
7470
8350
  const pageId = typeof parsed.data.page_id === "string" ? parsed.data.page_id : `output:${slug}`;
@@ -7516,7 +8396,7 @@ async function loadSavedOutputPages(wikiDir) {
7516
8396
  // src/search.ts
7517
8397
  import fs14 from "fs/promises";
7518
8398
  import path17 from "path";
7519
- import matter7 from "gray-matter";
8399
+ import matter8 from "gray-matter";
7520
8400
  function getDatabaseSync() {
7521
8401
  const builtin = process.getBuiltinModule?.("node:sqlite");
7522
8402
  if (!builtin?.DatabaseSync) {
@@ -7534,6 +8414,9 @@ function normalizeKind(value) {
7534
8414
  function normalizeStatus(value) {
7535
8415
  return value === "draft" || value === "candidate" || value === "active" || value === "archived" ? value : void 0;
7536
8416
  }
8417
+ function normalizeSourceType2(value) {
8418
+ return value === "arxiv" || value === "doi" || value === "tweet" || value === "article" || value === "url" ? value : void 0;
8419
+ }
7537
8420
  async function rebuildSearchIndex(dbPath, pages, wikiDir) {
7538
8421
  await ensureDir(path17.dirname(dbPath));
7539
8422
  const DatabaseSync = getDatabaseSync();
@@ -7549,6 +8432,7 @@ async function rebuildSearchIndex(dbPath, pages, wikiDir) {
7549
8432
  body TEXT NOT NULL,
7550
8433
  kind TEXT NOT NULL,
7551
8434
  status TEXT NOT NULL,
8435
+ source_type TEXT NOT NULL,
7552
8436
  project_ids TEXT NOT NULL,
7553
8437
  project_key TEXT NOT NULL
7554
8438
  );
@@ -7562,12 +8446,12 @@ async function rebuildSearchIndex(dbPath, pages, wikiDir) {
7562
8446
  DELETE FROM pages;
7563
8447
  `);
7564
8448
  const insertPage = db.prepare(
7565
- "INSERT INTO pages (id, path, title, body, kind, status, project_ids, project_key) VALUES (?, ?, ?, ?, ?, ?, ?, ?)"
8449
+ "INSERT INTO pages (id, path, title, body, kind, status, source_type, project_ids, project_key) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)"
7566
8450
  );
7567
8451
  for (const page of pages) {
7568
8452
  const absolutePath = path17.join(wikiDir, page.path);
7569
8453
  const content = await fs14.readFile(absolutePath, "utf8");
7570
- const parsed = matter7(content);
8454
+ const parsed = matter8(content);
7571
8455
  insertPage.run(
7572
8456
  page.id,
7573
8457
  page.path,
@@ -7575,6 +8459,7 @@ async function rebuildSearchIndex(dbPath, pages, wikiDir) {
7575
8459
  parsed.content,
7576
8460
  page.kind,
7577
8461
  page.status,
8462
+ typeof parsed.data.source_type === "string" ? parsed.data.source_type : "",
7578
8463
  JSON.stringify(page.projectIds),
7579
8464
  page.projectIds.map((projectId) => `|${projectId}|`).join("")
7580
8465
  );
@@ -7608,6 +8493,10 @@ function searchPages(dbPath, query, limitOrOptions = 5) {
7608
8493
  params.push(`%|${options.project}|%`);
7609
8494
  }
7610
8495
  }
8496
+ if (options.sourceType && options.sourceType !== "all") {
8497
+ clauses.push("pages.source_type = ?");
8498
+ params.push(options.sourceType);
8499
+ }
7611
8500
  const statement = db.prepare(`
7612
8501
  SELECT
7613
8502
  pages.id AS pageId,
@@ -7615,6 +8504,7 @@ function searchPages(dbPath, query, limitOrOptions = 5) {
7615
8504
  pages.title AS title,
7616
8505
  pages.kind AS kind,
7617
8506
  pages.status AS status,
8507
+ pages.source_type AS sourceType,
7618
8508
  pages.project_ids AS projectIds,
7619
8509
  snippet(page_search, 1, '[', ']', '...', 16) AS snippet,
7620
8510
  bm25(page_search) AS rank
@@ -7642,13 +8532,14 @@ function searchPages(dbPath, query, limitOrOptions = 5) {
7642
8532
  title: String(row.title ?? ""),
7643
8533
  kind: normalizeKind(row.kind),
7644
8534
  status: normalizeStatus(row.status),
8535
+ sourceType: normalizeSourceType2(row.sourceType),
7645
8536
  snippet: String(row.snippet ?? ""),
7646
8537
  rank: Number(row.rank ?? 0)
7647
8538
  }));
7648
8539
  }
7649
8540
 
7650
8541
  // src/vault.ts
7651
- function uniqueStrings2(values) {
8542
+ function uniqueStrings3(values) {
7652
8543
  return uniqueBy(values.filter(Boolean), (value) => value);
7653
8544
  }
7654
8545
  function normalizeOutputFormat2(format) {
@@ -7809,7 +8700,7 @@ async function resolveImageGenerationProvider(rootDir) {
7809
8700
  if (!providerConfig) {
7810
8701
  throw new Error(`No provider configured with id "${preferredProviderId}" for task "imageProvider".`);
7811
8702
  }
7812
- const { createProvider: createProvider2 } = await import("./registry-X5PMZTZY.js");
8703
+ const { createProvider: createProvider2 } = await import("./registry-6KZMA3XM.js");
7813
8704
  return createProvider2(preferredProviderId, providerConfig, rootDir);
7814
8705
  }
7815
8706
  async function generateOutputArtifacts(rootDir, input) {
@@ -8013,7 +8904,7 @@ function normalizeProjectRoot(root) {
8013
8904
  function projectEntries(config) {
8014
8905
  return Object.entries(config.projects ?? {}).map(([id, project]) => ({
8015
8906
  id,
8016
- roots: uniqueStrings2(project.roots.map(normalizeProjectRoot)).filter(Boolean),
8907
+ roots: uniqueStrings3(project.roots.map(normalizeProjectRoot)).filter(Boolean),
8017
8908
  schemaPath: project.schemaPath
8018
8909
  })).sort((left, right) => left.id.localeCompare(right.id));
8019
8910
  }
@@ -8061,11 +8952,11 @@ function resolveSourceProjects(rootDir, manifests, config) {
8061
8952
  return Object.fromEntries(manifests.map((manifest) => [manifest.sourceId, resolveSourceProjectId(rootDir, manifest, config)]));
8062
8953
  }
8063
8954
  function scopedProjectIdsFromSources(sourceIds, sourceProjects) {
8064
- const projectIds = uniqueStrings2(sourceIds.map((sourceId) => sourceProjects[sourceId] ?? "").filter(Boolean));
8955
+ const projectIds = uniqueStrings3(sourceIds.map((sourceId) => sourceProjects[sourceId] ?? "").filter(Boolean));
8065
8956
  return projectIds.length === 1 ? projectIds : [];
8066
8957
  }
8067
8958
  function schemaProjectIdsFromPages(pageIds, pageMap2) {
8068
- return uniqueStrings2(
8959
+ return uniqueStrings3(
8069
8960
  pageIds.flatMap((pageId) => pageMap2.get(pageId)?.projectIds ?? []).filter(Boolean).sort((left, right) => left.localeCompare(right))
8070
8961
  );
8071
8962
  }
@@ -8074,7 +8965,7 @@ function categoryTagsForSchema(schema, texts) {
8074
8965
  if (!haystack) {
8075
8966
  return [];
8076
8967
  }
8077
- return uniqueStrings2(
8968
+ return uniqueStrings3(
8078
8969
  schemaCategoryLabels({ path: "", hash: "", content: schema.content }).filter((label) => haystack.includes(label.toLowerCase())).map((label) => `category/${slugify(label)}`)
8079
8970
  ).slice(0, 3);
8080
8971
  }
@@ -8285,7 +9176,7 @@ async function buildManagedContent(absolutePath, defaults, build) {
8285
9176
  return content;
8286
9177
  }
8287
9178
  function indexCompiledFrom(pages) {
8288
- return uniqueStrings2(pages.flatMap((page) => page.sourceIds));
9179
+ return uniqueStrings3(pages.flatMap((page) => page.sourceIds));
8289
9180
  }
8290
9181
  function deriveGraphMetrics(nodes, edges) {
8291
9182
  const adjacency = /* @__PURE__ */ new Map();
@@ -8678,17 +9569,42 @@ function buildGraph(manifests, analyses, pages, sourceProjects, _codeIndex) {
8678
9569
  ...conceptMap.values(),
8679
9570
  ...entityMap.values()
8680
9571
  ];
8681
- const metrics = deriveGraphMetrics(graphNodes, edges);
9572
+ const enriched = enrichGraph(
9573
+ {
9574
+ generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
9575
+ nodes: graphNodes,
9576
+ edges,
9577
+ communities: [],
9578
+ sources: manifests,
9579
+ pages
9580
+ },
9581
+ manifests,
9582
+ analyses
9583
+ );
9584
+ const metrics = deriveGraphMetrics(graphNodes, enriched.edges);
8682
9585
  return {
8683
9586
  generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
8684
9587
  nodes: metrics.nodes,
8685
- edges,
9588
+ edges: enriched.edges,
9589
+ hyperedges: enriched.hyperedges,
8686
9590
  communities: metrics.communities,
8687
9591
  sources: manifests,
8688
9592
  pages
8689
9593
  };
8690
9594
  }
8691
- async function buildGraphOrientationPages(graph, paths, schemaHash) {
9595
+ function recentResearchSourcePages(graph, previousCompiledAt) {
9596
+ const previousTimestamp = previousCompiledAt ? Date.parse(previousCompiledAt) : Number.NaN;
9597
+ return graph.pages.filter(
9598
+ (page) => page.kind === "source" && Boolean(page.sourceType) && page.sourceType !== "url"
9599
+ ).filter((page) => Number.isNaN(previousTimestamp) || Date.parse(page.updatedAt) > previousTimestamp).sort((left, right) => right.updatedAt.localeCompare(left.updatedAt) || left.title.localeCompare(right.title)).slice(0, 8).map((page) => ({
9600
+ id: page.id,
9601
+ path: page.path,
9602
+ title: page.title,
9603
+ updatedAt: page.updatedAt,
9604
+ sourceType: page.sourceType
9605
+ }));
9606
+ }
9607
+ async function buildGraphOrientationPages(graph, paths, schemaHash, previousCompiledAt) {
8692
9608
  const benchmark = await readJsonFile(paths.benchmarkPath);
8693
9609
  const communityRecords = [];
8694
9610
  for (const community of graph.communities ?? []) {
@@ -8698,7 +9614,7 @@ async function buildGraphOrientationPages(graph, paths, schemaHash) {
8698
9614
  absolutePath,
8699
9615
  {
8700
9616
  managedBy: "system",
8701
- compiledFrom: uniqueStrings2(
9617
+ compiledFrom: uniqueStrings3(
8702
9618
  community.nodeIds.flatMap((nodeId) => graph.nodes.find((node) => node.id === nodeId)?.sourceIds ?? [])
8703
9619
  ),
8704
9620
  confidence: 1
@@ -8712,23 +9628,33 @@ async function buildGraphOrientationPages(graph, paths, schemaHash) {
8712
9628
  )
8713
9629
  );
8714
9630
  }
9631
+ const report = buildGraphReportArtifact({
9632
+ graph,
9633
+ communityPages: communityRecords.map((record) => record.page),
9634
+ benchmark,
9635
+ benchmarkStale: benchmark ? benchmark.graphHash !== graphHash(graph) : false,
9636
+ recentResearchSources: recentResearchSourcePages(graph, previousCompiledAt),
9637
+ graphHash: graphHash(graph)
9638
+ });
8715
9639
  const reportAbsolutePath = path18.join(paths.wikiDir, "graph", "report.md");
8716
9640
  const reportRecord = await buildManagedGraphPage(
8717
9641
  reportAbsolutePath,
8718
9642
  {
8719
9643
  managedBy: "system",
8720
- compiledFrom: uniqueStrings2(graph.pages.flatMap((page) => page.sourceIds)),
9644
+ compiledFrom: uniqueStrings3(graph.pages.flatMap((page) => page.sourceIds)),
8721
9645
  confidence: 1
8722
9646
  },
8723
9647
  (metadata) => buildGraphReportPage({
8724
9648
  graph,
8725
9649
  schemaHash,
8726
9650
  metadata,
8727
- communityPages: communityRecords.map((record) => record.page),
8728
- benchmark
9651
+ report
8729
9652
  })
8730
9653
  );
8731
- return [reportRecord, ...communityRecords];
9654
+ return {
9655
+ records: [reportRecord, ...communityRecords],
9656
+ report
9657
+ };
8732
9658
  }
8733
9659
  async function writePage(wikiDir, relativePath, content, changedPages) {
8734
9660
  const absolutePath = path18.resolve(wikiDir, relativePath);
@@ -9025,7 +9951,7 @@ async function syncVaultArtifacts(rootDir, input) {
9025
9951
  const itemKind = kind === "concepts" ? "concept" : "entity";
9026
9952
  const slug = slugify(aggregate.name);
9027
9953
  const pageId = `${itemKind}:${slug}`;
9028
- const sourceIds = uniqueStrings2(aggregate.sourceAnalyses.map((item) => item.sourceId));
9954
+ const sourceIds = uniqueStrings3(aggregate.sourceAnalyses.map((item) => item.sourceId));
9029
9955
  const projectIds = scopedProjectIdsFromSources(sourceIds, input.sourceProjects);
9030
9956
  const schemaHash = effectiveHashForProject(input.schemas, projectIds[0] ?? null);
9031
9957
  const previousEntry = input.previousState?.candidateHistory?.[pageId];
@@ -9091,9 +10017,9 @@ async function syncVaultArtifacts(rootDir, input) {
9091
10017
  const compiledPages = records.map((record) => record.page);
9092
10018
  const basePages = [...compiledPages, ...input.outputPages, ...input.insightPages];
9093
10019
  const baseGraph = buildGraph(input.manifests, input.analyses, basePages, input.sourceProjects, input.codeIndex);
9094
- const graphOrientationRecords = await buildGraphOrientationPages(baseGraph, paths, globalSchemaHash);
9095
- records.push(...graphOrientationRecords);
9096
- const allPages = [...basePages, ...graphOrientationRecords.map((record) => record.page)];
10020
+ const graphOrientation = await buildGraphOrientationPages(baseGraph, paths, globalSchemaHash, input.previousState?.generatedAt);
10021
+ records.push(...graphOrientation.records);
10022
+ const allPages = [...basePages, ...graphOrientation.records.map((record) => record.page)];
9097
10023
  const graph = {
9098
10024
  ...baseGraph,
9099
10025
  pages: allPages
@@ -9226,7 +10152,7 @@ async function syncVaultArtifacts(rootDir, input) {
9226
10152
  const nextPagePaths = new Set(records.map((record) => record.page.path));
9227
10153
  const obsoleteGraphPaths = (previousGraph?.pages ?? []).filter((page) => page.kind !== "output" && page.kind !== "insight").map((page) => page.path).filter((relativePath) => !nextPagePaths.has(relativePath));
9228
10154
  const existingProjectIndexPaths = (await listFilesRecursive(paths.projectsDir)).filter((absolutePath) => absolutePath.endsWith(".md")).map((absolutePath) => toPosix(path18.relative(paths.wikiDir, absolutePath))).filter((relativePath) => !nextPagePaths.has(relativePath));
9229
- const obsoletePaths = uniqueStrings2([...obsoleteGraphPaths, ...existingProjectIndexPaths]);
10155
+ const obsoletePaths = uniqueStrings3([...obsoleteGraphPaths, ...existingProjectIndexPaths]);
9230
10156
  const changedFiles = [];
9231
10157
  for (const record of records) {
9232
10158
  const absolutePath = path18.join(paths.wikiDir, record.page.path);
@@ -9258,6 +10184,7 @@ async function syncVaultArtifacts(rootDir, input) {
9258
10184
  await fs15.rm(path18.join(paths.wikiDir, relativePath), { force: true });
9259
10185
  }
9260
10186
  await writeJsonFile(paths.graphPath, graph);
10187
+ await writeJsonFile(path18.join(paths.wikiDir, "graph", "report.json"), graphOrientation.report);
9261
10188
  await writeJsonFile(paths.codeIndexPath, input.codeIndex);
9262
10189
  await writeJsonFile(paths.compileStatePath, {
9263
10190
  generatedAt: graph.generatedAt,
@@ -9283,7 +10210,7 @@ async function syncVaultArtifacts(rootDir, input) {
9283
10210
  return {
9284
10211
  graph,
9285
10212
  allPages,
9286
- changedPages: uniqueStrings2([...changedPages, ...writeChanges]),
10213
+ changedPages: uniqueStrings3([...changedPages, ...writeChanges]),
9287
10214
  promotedPageIds,
9288
10215
  candidatePageCount: candidatePages.length,
9289
10216
  staged: false
@@ -9292,18 +10219,20 @@ async function syncVaultArtifacts(rootDir, input) {
9292
10219
  async function refreshIndexesAndSearch(rootDir, pages) {
9293
10220
  const { config, paths } = await loadVaultConfig(rootDir);
9294
10221
  const schemas = await loadVaultSchemas(rootDir);
10222
+ const compileState = await readJsonFile(paths.compileStatePath);
9295
10223
  const globalSchemaHash = schemas.effective.global.hash;
9296
10224
  const currentGraph = await readJsonFile(paths.graphPath);
9297
10225
  const basePages = pages.filter((page) => page.kind !== "graph_report" && page.kind !== "community_summary");
9298
- const graphOrientationRecords = currentGraph ? await buildGraphOrientationPages(
10226
+ const graphOrientation = currentGraph ? await buildGraphOrientationPages(
9299
10227
  {
9300
10228
  ...currentGraph,
9301
10229
  pages: basePages
9302
10230
  },
9303
10231
  paths,
9304
- globalSchemaHash
9305
- ) : [];
9306
- const pagesWithGraph = sortGraphPages([...basePages, ...graphOrientationRecords.map((record) => record.page)]);
10232
+ globalSchemaHash,
10233
+ compileState?.generatedAt
10234
+ ) : { records: [], report: null };
10235
+ const pagesWithGraph = sortGraphPages([...basePages, ...graphOrientation.records.map((record) => record.page)]);
9307
10236
  if (currentGraph) {
9308
10237
  await writeJsonFile(paths.graphPath, {
9309
10238
  ...currentGraph,
@@ -9409,9 +10338,12 @@ async function refreshIndexesAndSearch(rootDir, pages) {
9409
10338
  )
9410
10339
  );
9411
10340
  }
9412
- for (const record of graphOrientationRecords) {
10341
+ for (const record of graphOrientation.records) {
9413
10342
  await writeFileIfChanged(path18.join(paths.wikiDir, record.page.path), record.content);
9414
10343
  }
10344
+ if (graphOrientation.report) {
10345
+ await writeJsonFile(path18.join(paths.wikiDir, "graph", "report.json"), graphOrientation.report);
10346
+ }
9415
10347
  const existingProjectIndexPaths = (await listFilesRecursive(paths.projectsDir)).filter((absolutePath) => absolutePath.endsWith(".md")).map((absolutePath) => toPosix(path18.relative(paths.wikiDir, absolutePath)));
9416
10348
  const allowedProjectIndexPaths = /* @__PURE__ */ new Set([
9417
10349
  "projects/index.md",
@@ -9421,7 +10353,7 @@ async function refreshIndexesAndSearch(rootDir, pages) {
9421
10353
  existingProjectIndexPaths.filter((relativePath) => !allowedProjectIndexPaths.has(relativePath)).map((relativePath) => fs15.rm(path18.join(paths.wikiDir, relativePath), { force: true }))
9422
10354
  );
9423
10355
  const existingGraphPages = (await listFilesRecursive(path18.join(paths.wikiDir, "graph").replace(/\/$/, "")).catch(() => [])).filter((absolutePath) => absolutePath.endsWith(".md")).map((absolutePath) => toPosix(path18.relative(paths.wikiDir, absolutePath)));
9424
- const allowedGraphPages = /* @__PURE__ */ new Set(["graph/index.md", ...graphOrientationRecords.map((record) => record.page.path)]);
10356
+ const allowedGraphPages = /* @__PURE__ */ new Set(["graph/index.md", ...graphOrientation.records.map((record) => record.page.path)]);
9425
10357
  await Promise.all(
9426
10358
  existingGraphPages.filter((relativePath) => !allowedGraphPages.has(relativePath)).map((relativePath) => fs15.rm(path18.join(paths.wikiDir, relativePath), { force: true }))
9427
10359
  );
@@ -9438,7 +10370,7 @@ async function prepareOutputPageSave(rootDir, input) {
9438
10370
  status: "active",
9439
10371
  createdAt: now,
9440
10372
  updatedAt: now,
9441
- compiledFrom: uniqueStrings2(input.relatedSourceIds ?? input.citations),
10373
+ compiledFrom: uniqueStrings3(input.relatedSourceIds ?? input.citations),
9442
10374
  managedBy: "system",
9443
10375
  confidence: 0.74
9444
10376
  }
@@ -9479,7 +10411,7 @@ async function prepareExploreHubSave(rootDir, input) {
9479
10411
  status: "active",
9480
10412
  createdAt: now,
9481
10413
  updatedAt: now,
9482
- compiledFrom: uniqueStrings2(input.citations),
10414
+ compiledFrom: uniqueStrings3(input.citations),
9483
10415
  managedBy: "system",
9484
10416
  confidence: 0.76
9485
10417
  }
@@ -9542,6 +10474,7 @@ async function stageOutputApprovalBundle(rootDir, stagedPages) {
9542
10474
  generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
9543
10475
  nodes: previousGraph?.nodes ?? [],
9544
10476
  edges: previousGraph?.edges ?? [],
10477
+ hyperedges: previousGraph?.hyperedges ?? [],
9545
10478
  sources: previousGraph?.sources ?? [],
9546
10479
  pages: nextPages
9547
10480
  };
@@ -9577,7 +10510,7 @@ async function executeQuery(rootDir, question, format) {
9577
10510
  const absolutePath = path18.join(paths.wikiDir, result.path);
9578
10511
  try {
9579
10512
  const content = await fs15.readFile(absolutePath, "utf8");
9580
- const parsed = matter8(content);
10513
+ const parsed = matter9(content);
9581
10514
  return `# ${result.title}
9582
10515
  ${truncate(normalizeWhitespace(parsed.content), 1200)}`;
9583
10516
  } catch {
@@ -9850,6 +10783,7 @@ async function acceptApproval(rootDir, approvalId, targets = []) {
9850
10783
  generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
9851
10784
  nodes: currentGraph?.nodes ?? bundleGraph?.nodes ?? [],
9852
10785
  edges: currentGraph?.edges ?? bundleGraph?.edges ?? [],
10786
+ hyperedges: currentGraph?.hyperedges ?? bundleGraph?.hyperedges ?? [],
9853
10787
  sources: currentGraph?.sources ?? bundleGraph?.sources ?? [],
9854
10788
  pages: sortGraphPages(nextPages)
9855
10789
  };
@@ -9927,13 +10861,13 @@ async function promoteCandidate(rootDir, target) {
9927
10861
  const graph = await readJsonFile(paths.graphPath);
9928
10862
  const candidate = resolveCandidateTarget(graph?.pages ?? [], target);
9929
10863
  const raw = await fs15.readFile(path18.join(paths.wikiDir, candidate.path), "utf8");
9930
- const parsed = matter8(raw);
10864
+ const parsed = matter9(raw);
9931
10865
  const nextUpdatedAt = (/* @__PURE__ */ new Date()).toISOString();
9932
- const nextContent = matter8.stringify(parsed.content, {
10866
+ const nextContent = matter9.stringify(parsed.content, {
9933
10867
  ...parsed.data,
9934
10868
  status: "active",
9935
10869
  updated_at: nextUpdatedAt,
9936
- tags: uniqueStrings2([candidate.kind, ...Array.isArray(parsed.data.tags) ? parsed.data.tags : []]).filter(
10870
+ tags: uniqueStrings3([candidate.kind, ...Array.isArray(parsed.data.tags) ? parsed.data.tags : []]).filter(
9937
10871
  (tag) => tag !== "candidate"
9938
10872
  )
9939
10873
  });
@@ -9950,6 +10884,7 @@ async function promoteCandidate(rootDir, target) {
9950
10884
  generatedAt: nextUpdatedAt,
9951
10885
  nodes: graph?.nodes ?? [],
9952
10886
  edges: graph?.edges ?? [],
10887
+ hyperedges: graph?.hyperedges ?? [],
9953
10888
  sources: graph?.sources ?? [],
9954
10889
  pages: nextPages
9955
10890
  };
@@ -9991,6 +10926,7 @@ async function archiveCandidate(rootDir, target) {
9991
10926
  generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
9992
10927
  nodes: graph?.nodes ?? [],
9993
10928
  edges: graph?.edges ?? [],
10929
+ hyperedges: graph?.hyperedges ?? [],
9994
10930
  sources: graph?.sources ?? [],
9995
10931
  pages: nextPages
9996
10932
  };
@@ -10075,7 +11011,7 @@ async function initVault(rootDir, options = {}) {
10075
11011
  const now = (/* @__PURE__ */ new Date()).toISOString();
10076
11012
  await writeFileIfChanged(
10077
11013
  insightsIndexPath,
10078
- matter8.stringify(
11014
+ matter9.stringify(
10079
11015
  [
10080
11016
  "# Insights",
10081
11017
  "",
@@ -10108,7 +11044,7 @@ async function initVault(rootDir, options = {}) {
10108
11044
  );
10109
11045
  await writeFileIfChanged(
10110
11046
  path18.join(paths.wikiDir, "projects", "index.md"),
10111
- matter8.stringify(["# Projects", "", "- Run `swarmvault compile` to build project rollups.", ""].join("\n"), {
11047
+ matter9.stringify(["# Projects", "", "- Run `swarmvault compile` to build project rollups.", ""].join("\n"), {
10112
11048
  page_id: "projects:index",
10113
11049
  kind: "index",
10114
11050
  title: "Projects",
@@ -10130,7 +11066,7 @@ async function initVault(rootDir, options = {}) {
10130
11066
  );
10131
11067
  await writeFileIfChanged(
10132
11068
  path18.join(paths.wikiDir, "candidates", "index.md"),
10133
- matter8.stringify(["# Candidates", "", "- Run `swarmvault compile` to stage candidate pages.", ""].join("\n"), {
11069
+ matter9.stringify(["# Candidates", "", "- Run `swarmvault compile` to stage candidate pages.", ""].join("\n"), {
10134
11070
  page_id: "candidates:index",
10135
11071
  kind: "index",
10136
11072
  title: "Candidates",
@@ -10154,6 +11090,20 @@ async function initVault(rootDir, options = {}) {
10154
11090
  await ensureObsidianWorkspace(rootDir);
10155
11091
  }
10156
11092
  }
11093
+ async function runConfiguredBenchmark(rootDir, config) {
11094
+ if (config.benchmark?.enabled === false) {
11095
+ return { ok: true };
11096
+ }
11097
+ try {
11098
+ await benchmarkVault(rootDir);
11099
+ return { ok: true };
11100
+ } catch (error) {
11101
+ return {
11102
+ ok: false,
11103
+ error: error instanceof Error ? error.message : String(error)
11104
+ };
11105
+ }
11106
+ }
10157
11107
  async function compileVault(rootDir, options = {}) {
10158
11108
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
10159
11109
  const { config, paths } = await initWorkspace(rootDir);
@@ -10169,7 +11119,7 @@ async function compileVault(rootDir, options = {}) {
10169
11119
  const currentInsightHashes = pageHashes(storedInsightPages);
10170
11120
  const previousState = await readJsonFile(paths.compileStatePath);
10171
11121
  const rootSchemaChanged = !previousState || previousState.rootSchemaHash !== schemas.root.hash;
10172
- const effectiveSchemaChanged = !previousState || previousGlobalSchemaHash(previousState) !== schemas.effective.global.hash || uniqueStrings2([...Object.keys(previousState?.effectiveSchemaHashes?.projects ?? {}), ...Object.keys(schemas.effective.projects)]).some(
11122
+ const effectiveSchemaChanged = !previousState || previousGlobalSchemaHash(previousState) !== schemas.effective.global.hash || uniqueStrings3([...Object.keys(previousState?.effectiveSchemaHashes?.projects ?? {}), ...Object.keys(schemas.effective.projects)]).some(
10173
11123
  (projectId) => previousProjectSchemaHash(previousState, projectId) !== effectiveHashForProject(schemas, projectId)
10174
11124
  );
10175
11125
  const nextProjectConfigHash = projectConfigHash(config);
@@ -10202,6 +11152,10 @@ async function compileVault(rootDir, options = {}) {
10202
11152
  }
10203
11153
  if (dirty.length === 0 && !rootSchemaChanged && !effectiveSchemaChanged && !projectConfigChanged && !sourcesChanged && !outputsChanged && !insightsChanged && !pendingCandidatePromotion && artifactsExist && !options.approve) {
10204
11154
  const graph = await readJsonFile(paths.graphPath);
11155
+ const benchmark2 = await runConfiguredBenchmark(rootDir, config);
11156
+ if (graph && benchmark2.ok) {
11157
+ await refreshIndexesAndSearch(rootDir, graph.pages);
11158
+ }
10205
11159
  await recordSession(rootDir, {
10206
11160
  operation: "compile",
10207
11161
  title: `Compiled ${manifests.length} source(s)`,
@@ -10219,7 +11173,8 @@ async function compileVault(rootDir, options = {}) {
10219
11173
  `clean=${manifests.length}`,
10220
11174
  `outputs=${outputPages.length}`,
10221
11175
  `insights=${insightPages.length}`,
10222
- `schema=${schemas.effective.global.hash.slice(0, 12)}`
11176
+ `schema=${schemas.effective.global.hash.slice(0, 12)}`,
11177
+ `benchmark=${benchmark2.ok ? "ok" : `error:${benchmark2.error}`}`
10223
11178
  ]
10224
11179
  });
10225
11180
  return {
@@ -10337,6 +11292,10 @@ async function compileVault(rootDir, options = {}) {
10337
11292
  postPassApprovalDir = staged.approvalDir;
10338
11293
  }
10339
11294
  }
11295
+ const benchmark = options.approve ? { ok: true } : await runConfiguredBenchmark(rootDir, config);
11296
+ if (!options.approve && benchmark.ok) {
11297
+ await refreshIndexesAndSearch(rootDir, sync.allPages);
11298
+ }
10340
11299
  await recordSession(rootDir, {
10341
11300
  operation: "compile",
10342
11301
  title: `Compiled ${manifests.length} source(s)`,
@@ -10358,7 +11317,8 @@ async function compileVault(rootDir, options = {}) {
10358
11317
  `promoted=${sync.promotedPageIds.length}`,
10359
11318
  `staged=${sync.staged}`,
10360
11319
  `postPassApproval=${postPassApprovalId ?? "none"}`,
10361
- `schema=${schemas.effective.global.hash.slice(0, 12)}`
11320
+ `schema=${schemas.effective.global.hash.slice(0, 12)}`,
11321
+ `benchmark=${benchmark.ok ? "ok" : `error:${benchmark.error}`}`
10362
11322
  ]
10363
11323
  });
10364
11324
  return {
@@ -10628,7 +11588,7 @@ ${orchestrationNotes.join("\n")}
10628
11588
  citations: allCitations,
10629
11589
  format: outputFormat,
10630
11590
  relatedPageCount: stepPages.length,
10631
- relatedNodeCount: uniqueStrings2(stepPages.flatMap((page) => page.nodeIds)).length,
11591
+ relatedNodeCount: uniqueStrings3(stepPages.flatMap((page) => page.nodeIds)).length,
10632
11592
  projectId: stepPages[0]?.projectIds[0] ?? null
10633
11593
  });
10634
11594
  const hubInput = {
@@ -10638,7 +11598,7 @@ ${orchestrationNotes.join("\n")}
10638
11598
  citations: allCitations,
10639
11599
  schemaHash: composeVaultSchema(
10640
11600
  schemas.root,
10641
- uniqueStrings2(stepPages.flatMap((page) => page.projectIds).sort((left, right) => left.localeCompare(right))).map((projectId) => schemas.projects[projectId]).filter((schema) => Boolean(schema?.hash))
11601
+ uniqueStrings3(stepPages.flatMap((page) => page.projectIds).sort((left, right) => left.localeCompare(right))).map((projectId) => schemas.projects[projectId]).filter((schema) => Boolean(schema?.hash))
10642
11602
  ).hash,
10643
11603
  outputFormat,
10644
11604
  outputAssets: hubAssetBundle.outputAssets,
@@ -10698,7 +11658,7 @@ ${orchestrationNotes.join("\n")}
10698
11658
  providerId: provider.id,
10699
11659
  success: true,
10700
11660
  relatedSourceIds: [...relatedSourceIds],
10701
- relatedPageIds: uniqueStrings2([...relatedPageIds, ...stepPages.map((page) => page.id), hubPage.id]),
11661
+ relatedPageIds: uniqueStrings3([...relatedPageIds, ...stepPages.map((page) => page.id), hubPage.id]),
10702
11662
  relatedNodeIds: [...relatedNodeIds],
10703
11663
  citations: allCitations,
10704
11664
  tokenUsage: tokenUsage.inputTokens > 0 || tokenUsage.outputTokens > 0 ? {
@@ -10753,7 +11713,7 @@ async function queryGraphVault(rootDir, question, options = {}) {
10753
11713
  return queryGraph(graph, question, searchResults, options);
10754
11714
  }
10755
11715
  async function benchmarkVault(rootDir, options = {}) {
10756
- const { paths } = await loadVaultConfig(rootDir);
11716
+ const { config, paths } = await loadVaultConfig(rootDir);
10757
11717
  const graph = await ensureCompiledGraph(rootDir);
10758
11718
  const manifests = await listManifests(rootDir);
10759
11719
  const pageContentsById = /* @__PURE__ */ new Map();
@@ -10769,11 +11729,13 @@ async function benchmarkVault(rootDir, options = {}) {
10769
11729
  if (!await fileExists(absolutePath)) {
10770
11730
  continue;
10771
11731
  }
10772
- const parsed = matter8(await fs15.readFile(absolutePath, "utf8"));
11732
+ const parsed = matter9(await fs15.readFile(absolutePath, "utf8"));
10773
11733
  pageContentsById.set(page.id, parsed.content);
10774
11734
  }
11735
+ const configuredQuestions = (config.benchmark?.questions ?? []).map((question) => normalizeWhitespace(question)).filter(Boolean);
11736
+ const maxQuestions = Math.max(1, options.maxQuestions ?? config.benchmark?.maxQuestions ?? 3);
10775
11737
  const questions = (options.questions ?? []).map((question) => normalizeWhitespace(question)).filter(Boolean);
10776
- const sampleQuestions = questions.length ? questions : [...DEFAULT_BENCHMARK_QUESTIONS];
11738
+ const sampleQuestions = (questions.length ? questions : configuredQuestions.length ? configuredQuestions : defaultBenchmarkQuestionsForGraph(graph, maxQuestions)).slice(0, maxQuestions);
10777
11739
  const perQuestion = sampleQuestions.map((question) => {
10778
11740
  const searchResults = searchPages(paths.searchDbPath, question, { limit: 12 });
10779
11741
  const result = queryGraph(graph, question, searchResults, { budget: 12 });
@@ -10783,6 +11745,7 @@ async function benchmarkVault(rootDir, options = {}) {
10783
11745
  queryTokens: metrics.queryTokens,
10784
11746
  reduction: metrics.reduction,
10785
11747
  visitedNodeIds: result.visitedNodeIds,
11748
+ visitedEdgeIds: result.visitedEdgeIds,
10786
11749
  pageIds: result.pageIds
10787
11750
  };
10788
11751
  });
@@ -10804,6 +11767,14 @@ async function explainGraphVault(rootDir, target) {
10804
11767
  const graph = await ensureCompiledGraph(rootDir);
10805
11768
  return explainGraphTarget(graph, target);
10806
11769
  }
11770
+ async function listGraphHyperedges(rootDir, target, limit = 25) {
11771
+ const graph = await ensureCompiledGraph(rootDir);
11772
+ return listHyperedges(graph, target, limit);
11773
+ }
11774
+ async function readGraphReport(rootDir) {
11775
+ const { paths } = await loadVaultConfig(rootDir);
11776
+ return readJsonFile(path18.join(paths.wikiDir, "graph", "report.json"));
11777
+ }
10807
11778
  async function listGodNodes(rootDir, limit = 10) {
10808
11779
  const graph = await ensureCompiledGraph(rootDir);
10809
11780
  return topGodNodes(graph, limit);
@@ -10820,7 +11791,7 @@ async function readPage(rootDir, relativePath) {
10820
11791
  return null;
10821
11792
  }
10822
11793
  const raw = await fs15.readFile(absolutePath, "utf8");
10823
- const parsed = matter8(raw);
11794
+ const parsed = matter9(raw);
10824
11795
  return {
10825
11796
  path: relativePath,
10826
11797
  title: typeof parsed.data.title === "string" ? parsed.data.title : path18.basename(relativePath, path18.extname(relativePath)),
@@ -10947,7 +11918,7 @@ async function lintVault(rootDir, options = {}) {
10947
11918
  providerId: provider?.id,
10948
11919
  success: true,
10949
11920
  relatedPageIds: graph.pages.map((page) => page.id),
10950
- relatedSourceIds: uniqueStrings2(graph.pages.flatMap((page) => page.sourceIds)),
11921
+ relatedSourceIds: uniqueStrings3(graph.pages.flatMap((page) => page.sourceIds)),
10951
11922
  lintFindingCount: findings.length,
10952
11923
  lines: [`findings=${findings.length}`, `deep=${Boolean(options.deep)}`, `web=${Boolean(options.web)}`]
10953
11924
  });
@@ -10967,7 +11938,7 @@ async function bootstrapDemo(rootDir, input) {
10967
11938
  }
10968
11939
 
10969
11940
  // src/mcp.ts
10970
- var SERVER_VERSION = "0.1.22";
11941
+ var SERVER_VERSION = "0.1.24";
10971
11942
  async function createMcpServer(rootDir) {
10972
11943
  const server = new McpServer({
10973
11944
  name: "swarmvault",
@@ -11045,10 +12016,19 @@ async function createMcpServer(rootDir) {
11045
12016
  return asToolText(result);
11046
12017
  }
11047
12018
  );
12019
+ server.registerTool(
12020
+ "graph_report",
12021
+ {
12022
+ description: "Return the machine-readable graph report and trust artifact."
12023
+ },
12024
+ async () => {
12025
+ return asToolText(await readGraphReport(rootDir) ?? { error: "Graph report not found. Run `swarmvault compile` first." });
12026
+ }
12027
+ );
11048
12028
  server.registerTool(
11049
12029
  "get_node",
11050
12030
  {
11051
- description: "Explain a graph node, its page, community, and neighbors.",
12031
+ description: "Explain a graph node, its page, community, neighbors, and group patterns.",
11052
12032
  inputSchema: {
11053
12033
  target: z8.string().min(1).describe("Node or page label/id")
11054
12034
  }
@@ -11057,6 +12037,19 @@ async function createMcpServer(rootDir) {
11057
12037
  return asToolText(await explainGraphVault(rootDir, target));
11058
12038
  }
11059
12039
  );
12040
+ server.registerTool(
12041
+ "get_hyperedges",
12042
+ {
12043
+ description: "List graph hyperedges, optionally filtered to a node or page target.",
12044
+ inputSchema: {
12045
+ target: z8.string().optional().describe("Optional node/page label or id to filter by"),
12046
+ limit: z8.number().int().min(1).max(50).optional().describe("Maximum hyperedges to return")
12047
+ }
12048
+ },
12049
+ async ({ target, limit }) => {
12050
+ return asToolText(await listGraphHyperedges(rootDir, target, limit ?? 25));
12051
+ }
12052
+ );
11060
12053
  server.registerTool(
11061
12054
  "get_neighbors",
11062
12055
  {
@@ -11598,7 +12591,7 @@ import fs18 from "fs/promises";
11598
12591
  import http from "http";
11599
12592
  import path22 from "path";
11600
12593
  import { promisify } from "util";
11601
- import matter9 from "gray-matter";
12594
+ import matter10 from "gray-matter";
11602
12595
  import mime2 from "mime-types";
11603
12596
 
11604
12597
  // src/watch.ts
@@ -11999,7 +12992,7 @@ async function readViewerPage(rootDir, relativePath) {
11999
12992
  return null;
12000
12993
  }
12001
12994
  const raw = await fs18.readFile(absolutePath, "utf8");
12002
- const parsed = matter9(raw);
12995
+ const parsed = matter10(raw);
12003
12996
  return {
12004
12997
  path: relativePath,
12005
12998
  title: typeof parsed.data.title === "string" ? parsed.data.title : path22.basename(relativePath, path22.extname(relativePath)),
@@ -12102,16 +13095,29 @@ async function startGraphServer(rootDir, port) {
12102
13095
  const kind = url.searchParams.get("kind") ?? "all";
12103
13096
  const status = url.searchParams.get("status") ?? "all";
12104
13097
  const project = url.searchParams.get("project") ?? "all";
13098
+ const sourceType = url.searchParams.get("sourceType") ?? "all";
12105
13099
  const results = searchPages(paths.searchDbPath, query, {
12106
13100
  limit: Number.isFinite(limit) ? limit : 10,
12107
13101
  kind,
12108
13102
  status,
12109
- project
13103
+ project,
13104
+ sourceType
12110
13105
  });
12111
13106
  response.writeHead(200, { "content-type": "application/json" });
12112
13107
  response.end(JSON.stringify(results));
12113
13108
  return;
12114
13109
  }
13110
+ if (url.pathname === "/api/graph-report") {
13111
+ const reportPath = path22.join(paths.wikiDir, "graph", "report.json");
13112
+ if (!await fileExists(reportPath)) {
13113
+ response.writeHead(404, { "content-type": "application/json" });
13114
+ response.end(JSON.stringify({ error: "Graph report artifact not found. Run `swarmvault compile` first." }));
13115
+ return;
13116
+ }
13117
+ response.writeHead(200, { "content-type": "application/json" });
13118
+ response.end(await fs18.readFile(reportPath, "utf8"));
13119
+ return;
13120
+ }
12115
13121
  if (url.pathname === "/api/watch-status") {
12116
13122
  response.writeHead(200, { "content-type": "application/json" });
12117
13123
  response.end(JSON.stringify(await getWatchStatus(rootDir)));
@@ -12241,6 +13247,7 @@ async function exportGraphHtml(rootDir, outputPath) {
12241
13247
  title: loaded.title,
12242
13248
  kind: page.kind,
12243
13249
  status: page.status,
13250
+ sourceType: page.sourceType,
12244
13251
  projectIds: page.projectIds,
12245
13252
  content: loaded.content,
12246
13253
  assets: await Promise.all(
@@ -12262,7 +13269,8 @@ async function exportGraphHtml(rootDir, outputPath) {
12262
13269
  }
12263
13270
  const script = await fs18.readFile(scriptPath, "utf8");
12264
13271
  const style = stylePath && await fileExists(stylePath) ? await fs18.readFile(stylePath, "utf8") : "";
12265
- const embeddedData = JSON.stringify({ graph, pages: pages.filter(Boolean) }, null, 2).replace(/</g, "\\u003c");
13272
+ const report = await readJsonFile(path22.join(paths.wikiDir, "graph", "report.json"));
13273
+ const embeddedData = JSON.stringify({ graph, pages: pages.filter(Boolean), report }, null, 2).replace(/</g, "\\u003c");
12266
13274
  const html = [
12267
13275
  "<!doctype html>",
12268
13276
  '<html lang="en">',
@@ -12318,6 +13326,7 @@ export {
12318
13326
  listApprovals,
12319
13327
  listCandidates,
12320
13328
  listGodNodes,
13329
+ listGraphHyperedges,
12321
13330
  listManifests,
12322
13331
  listPages,
12323
13332
  listSchedules,
@@ -12331,6 +13340,7 @@ export {
12331
13340
  queryVault,
12332
13341
  readApproval,
12333
13342
  readExtractedText,
13343
+ readGraphReport,
12334
13344
  readPage,
12335
13345
  rejectApproval,
12336
13346
  resolvePaths,