@swarmvaultai/engine 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1764,6 +1764,8 @@ var grammarAssetByLanguage = {
1764
1764
  java: { packageName: TREE_SITTER_RUNTIME_PACKAGE, relativePath: "wasm/tree-sitter-java.wasm" },
1765
1765
  kotlin: { packageName: TREE_SITTER_EXTRA_GRAMMARS_PACKAGE, relativePath: "out/tree-sitter-kotlin.wasm" },
1766
1766
  scala: { packageName: TREE_SITTER_EXTRA_GRAMMARS_PACKAGE, relativePath: "out/tree-sitter-scala.wasm" },
1767
+ lua: { packageName: TREE_SITTER_EXTRA_GRAMMARS_PACKAGE, relativePath: "out/tree-sitter-lua.wasm" },
1768
+ zig: { packageName: TREE_SITTER_EXTRA_GRAMMARS_PACKAGE, relativePath: "out/tree-sitter-zig.wasm" },
1767
1769
  csharp: { packageName: TREE_SITTER_RUNTIME_PACKAGE, relativePath: "wasm/tree-sitter-c-sharp.wasm" },
1768
1770
  c: { packageName: TREE_SITTER_RUNTIME_PACKAGE, relativePath: "wasm/tree-sitter-cpp.wasm" },
1769
1771
  cpp: { packageName: TREE_SITTER_RUNTIME_PACKAGE, relativePath: "wasm/tree-sitter-cpp.wasm" },
@@ -1828,7 +1830,7 @@ function normalizeSymbolReference(value) {
1828
1830
  return lastSegment.replace(/[,:;]+$/g, "").trim();
1829
1831
  }
1830
1832
  function stripCodeExtension(filePath) {
1831
- return filePath.replace(/\.(?:[cm]?jsx?|tsx?|mts|cts|py|go|rs|java|kt|kts|scala|sc|cs|php|c|cc|cpp|cxx|h|hh|hpp|hxx)$/i, "");
1833
+ return filePath.replace(/\.(?:[cm]?jsx?|tsx?|mts|cts|py|go|rs|java|kt|kts|scala|sc|lua|zig|cs|php|c|cc|cpp|cxx|h|hh|hpp|hxx)$/i, "");
1832
1834
  }
1833
1835
  function manifestModuleName(manifest, language) {
1834
1836
  const repoPath = manifest.repoRelativePath ?? path5.basename(manifest.originalPath ?? manifest.storedPath);
@@ -2112,7 +2114,7 @@ function descendantTypeNames(node) {
2112
2114
  );
2113
2115
  }
2114
2116
  function quotedPath(value) {
2115
- return value.replace(/^["'<]+|[">]+$/g, "").trim();
2117
+ return value.replace(/^['"<]+|['">]+$/g, "").trim();
2116
2118
  }
2117
2119
  function diagnosticsFromTree(rootNode) {
2118
2120
  if (!rootNode.hasError) {
@@ -2262,6 +2264,43 @@ function parseScalaImport(text) {
2262
2264
  reExport: false
2263
2265
  }));
2264
2266
  }
2267
+ function parseLuaRequire(node) {
2268
+ const stringNode = node.descendantsOfType("string").find((item) => item !== null);
2269
+ const identifiers = node.descendantsOfType("identifier").filter((item) => item !== null).map((item) => item.text.trim());
2270
+ if (!stringNode || !identifiers.includes("require")) {
2271
+ return void 0;
2272
+ }
2273
+ const specifier = quotedPath(stringNode.text);
2274
+ if (!specifier) {
2275
+ return void 0;
2276
+ }
2277
+ return {
2278
+ specifier,
2279
+ importedSymbols: [],
2280
+ isExternal: !/^[A-Za-z_][A-Za-z0-9_]*(?:[./][A-Za-z_][A-Za-z0-9_]*)*$/.test(specifier),
2281
+ reExport: false
2282
+ };
2283
+ }
2284
+ function parseZigImport(node) {
2285
+ if (node.type !== "variable_declaration") {
2286
+ return void 0;
2287
+ }
2288
+ const importCall = findNamedChild(node, "builtin_function");
2289
+ if (!importCall || nodeText(findNamedChild(importCall, "builtin_identifier") ?? importCall.namedChildren.at(0) ?? null) !== "@import") {
2290
+ return void 0;
2291
+ }
2292
+ const stringNode = importCall.descendantsOfType("string_content").find((item) => item !== null);
2293
+ const specifier = stringNode?.text.trim();
2294
+ if (!specifier) {
2295
+ return void 0;
2296
+ }
2297
+ return {
2298
+ specifier,
2299
+ importedSymbols: [],
2300
+ isExternal: !specifier.endsWith(".zig") && !specifier.includes("/") && !specifier.startsWith("."),
2301
+ reExport: false
2302
+ };
2303
+ }
2265
2304
  function parseCSharpUsing(text) {
2266
2305
  const aliasMatch = text.trim().match(/^using\s+([A-Za-z_]\w*)\s*=\s*([^;]+);$/);
2267
2306
  if (aliasMatch) {
@@ -2386,6 +2425,28 @@ function scalaDefinitionKind(node) {
2386
2425
  }
2387
2426
  return void 0;
2388
2427
  }
2428
+ function luaFunctionName(node) {
2429
+ if (!node) {
2430
+ return void 0;
2431
+ }
2432
+ if (node.type === "identifier") {
2433
+ return node.text.trim();
2434
+ }
2435
+ if (node.type === "variable") {
2436
+ const identifiers = node.descendantsOfType("identifier").filter((item) => item !== null).map((item) => item.text.trim()).filter(Boolean);
2437
+ return identifiers.length > 0 ? identifiers.join(".") : void 0;
2438
+ }
2439
+ return extractIdentifier(node);
2440
+ }
2441
+ function zigDeclarationKind(node) {
2442
+ if (findNamedChild(node, "struct_declaration")) {
2443
+ return "struct";
2444
+ }
2445
+ if (findNamedChild(node, "enum_declaration")) {
2446
+ return "enum";
2447
+ }
2448
+ return void 0;
2449
+ }
2389
2450
  function pythonCodeAnalysis(manifest, rootNode, diagnostics) {
2390
2451
  const imports = [];
2391
2452
  const draftSymbols = [];
@@ -2814,6 +2875,136 @@ function scalaCodeAnalysis(manifest, rootNode, diagnostics) {
2814
2875
  namespace: packageName
2815
2876
  });
2816
2877
  }
2878
+ function luaCodeAnalysis(manifest, rootNode, diagnostics) {
2879
+ const imports = [];
2880
+ const draftSymbols = [];
2881
+ const exportLabels = [];
2882
+ for (const child of rootNode.namedChildren) {
2883
+ if (!child) {
2884
+ continue;
2885
+ }
2886
+ if (child.type === "local_variable_declaration" || child.type === "assignment_statement") {
2887
+ const parsed = parseLuaRequire(child);
2888
+ if (parsed) {
2889
+ imports.push(parsed);
2890
+ }
2891
+ continue;
2892
+ }
2893
+ if (!["function_definition_statement", "local_function_definition_statement"].includes(child.type)) {
2894
+ continue;
2895
+ }
2896
+ const name = luaFunctionName(child.childForFieldName("name") ?? child.namedChildren.at(0) ?? null);
2897
+ if (!name) {
2898
+ continue;
2899
+ }
2900
+ draftSymbols.push({
2901
+ name,
2902
+ kind: "function",
2903
+ signature: singleLineSignature(child.text),
2904
+ exported: child.type !== "local_function_definition_statement",
2905
+ callNames: [],
2906
+ extendsNames: [],
2907
+ implementsNames: [],
2908
+ bodyText: nodeText(findNamedChild(child, "block") ?? child.childForFieldName("body")) || child.text
2909
+ });
2910
+ if (child.type !== "local_function_definition_statement") {
2911
+ exportLabels.push(name);
2912
+ }
2913
+ }
2914
+ return finalizeCodeAnalysis(manifest, "lua", imports, draftSymbols, exportLabels, diagnostics);
2915
+ }
2916
+ function zigCodeAnalysis(manifest, rootNode, diagnostics) {
2917
+ const imports = [];
2918
+ const draftSymbols = [];
2919
+ const exportLabels = [];
2920
+ const pushStructMembers = (structNode, scopeName) => {
2921
+ if (!structNode) {
2922
+ return;
2923
+ }
2924
+ for (const child of structNode.namedChildren) {
2925
+ if (!child || child.type !== "function_declaration") {
2926
+ continue;
2927
+ }
2928
+ const functionName = extractIdentifier(child.childForFieldName("name") ?? findNamedChild(child, "identifier"));
2929
+ if (!functionName) {
2930
+ continue;
2931
+ }
2932
+ const exported = /\bpub\b/.test(child.text);
2933
+ const symbolName = `${scopeName}.${functionName}`;
2934
+ draftSymbols.push({
2935
+ name: symbolName,
2936
+ kind: "function",
2937
+ signature: singleLineSignature(child.text),
2938
+ exported,
2939
+ callNames: [],
2940
+ extendsNames: [],
2941
+ implementsNames: [],
2942
+ bodyText: nodeText(findNamedChild(child, "block") ?? child.childForFieldName("body")) || child.text
2943
+ });
2944
+ if (exported) {
2945
+ exportLabels.push(symbolName);
2946
+ }
2947
+ }
2948
+ };
2949
+ for (const child of rootNode.namedChildren) {
2950
+ if (!child) {
2951
+ continue;
2952
+ }
2953
+ if (child.type === "variable_declaration") {
2954
+ const parsedImport = parseZigImport(child);
2955
+ if (parsedImport) {
2956
+ imports.push(parsedImport);
2957
+ continue;
2958
+ }
2959
+ const name = extractIdentifier(child.childForFieldName("name") ?? findNamedChild(child, "identifier"));
2960
+ const kind = zigDeclarationKind(child);
2961
+ if (!name || !kind) {
2962
+ continue;
2963
+ }
2964
+ const declarationNode = findNamedChild(child, "struct_declaration") ?? findNamedChild(child, "enum_declaration");
2965
+ const exported2 = /\bpub\b/.test(child.text);
2966
+ draftSymbols.push({
2967
+ name,
2968
+ kind,
2969
+ signature: singleLineSignature(child.text),
2970
+ exported: exported2,
2971
+ callNames: [],
2972
+ extendsNames: [],
2973
+ implementsNames: [],
2974
+ bodyText: nodeText(declarationNode) || child.text
2975
+ });
2976
+ if (exported2) {
2977
+ exportLabels.push(name);
2978
+ }
2979
+ if (kind === "struct") {
2980
+ pushStructMembers(declarationNode, name);
2981
+ }
2982
+ continue;
2983
+ }
2984
+ if (child.type !== "function_declaration") {
2985
+ continue;
2986
+ }
2987
+ const functionName = extractIdentifier(child.childForFieldName("name") ?? findNamedChild(child, "identifier"));
2988
+ if (!functionName) {
2989
+ continue;
2990
+ }
2991
+ const exported = /\bpub\b/.test(child.text);
2992
+ draftSymbols.push({
2993
+ name: functionName,
2994
+ kind: "function",
2995
+ signature: singleLineSignature(child.text),
2996
+ exported,
2997
+ callNames: [],
2998
+ extendsNames: [],
2999
+ implementsNames: [],
3000
+ bodyText: nodeText(findNamedChild(child, "block") ?? child.childForFieldName("body")) || child.text
3001
+ });
3002
+ if (exported) {
3003
+ exportLabels.push(functionName);
3004
+ }
3005
+ }
3006
+ return finalizeCodeAnalysis(manifest, "zig", imports, draftSymbols, exportLabels, diagnostics);
3007
+ }
2817
3008
  function csharpCodeAnalysis(manifest, rootNode, diagnostics) {
2818
3009
  const imports = [];
2819
3010
  const draftSymbols = [];
@@ -3246,6 +3437,10 @@ async function analyzeTreeSitterCode(manifest, content, language) {
3246
3437
  return { code: kotlinCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
3247
3438
  case "scala":
3248
3439
  return { code: scalaCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
3440
+ case "lua":
3441
+ return { code: luaCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
3442
+ case "zig":
3443
+ return { code: zigCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
3249
3444
  case "csharp":
3250
3445
  return { code: csharpCodeAnalysis(manifest, tree.rootNode, diagnostics), rationales };
3251
3446
  case "php":
@@ -3532,7 +3727,7 @@ function makeRationale2(manifest, index, text, kind, symbolName) {
3532
3727
  };
3533
3728
  }
3534
3729
  function stripCodeExtension2(filePath) {
3535
- return filePath.replace(/\.(?:[cm]?jsx?|tsx?|mts|cts|py|go|rs|java|kt|kts|scala|sc|cs|php|c|cc|cpp|cxx|h|hh|hpp|hxx)$/i, "");
3730
+ return filePath.replace(/\.(?:[cm]?jsx?|tsx?|mts|cts|py|go|rs|java|kt|kts|scala|sc|lua|zig|cs|php|c|cc|cpp|cxx|h|hh|hpp|hxx)$/i, "");
3536
3731
  }
3537
3732
  function manifestModuleName2(manifest, language) {
3538
3733
  const repoPath = manifest.repoRelativePath ?? path6.basename(manifest.originalPath ?? manifest.storedPath);
@@ -3869,6 +4064,12 @@ function inferCodeLanguage(filePath, mimeType = "") {
3869
4064
  if (extension === ".scala" || extension === ".sc") {
3870
4065
  return "scala";
3871
4066
  }
4067
+ if (extension === ".lua") {
4068
+ return "lua";
4069
+ }
4070
+ if (extension === ".zig") {
4071
+ return "zig";
4072
+ }
3872
4073
  if (extension === ".cs") {
3873
4074
  return "csharp";
3874
4075
  }
@@ -3977,6 +4178,10 @@ function candidateExtensionsFor(language) {
3977
4178
  return [".kt", ".kts"];
3978
4179
  case "scala":
3979
4180
  return [".scala", ".sc"];
4181
+ case "lua":
4182
+ return [".lua"];
4183
+ case "zig":
4184
+ return [".zig"];
3980
4185
  case "csharp":
3981
4186
  return [".cs"];
3982
4187
  case "php":
@@ -4049,6 +4254,20 @@ async function buildCodeIndex(rootDir, manifests, analyses) {
4049
4254
  }
4050
4255
  }
4051
4256
  break;
4257
+ case "lua":
4258
+ recordAlias(aliases, basename);
4259
+ if (repoRelativePath) {
4260
+ const repoWithoutExt = stripCodeExtension2(repoRelativePath);
4261
+ recordAlias(aliases, repoWithoutExt.replace(/\//g, "."));
4262
+ if (repoWithoutExt.endsWith("/init")) {
4263
+ recordAlias(aliases, repoWithoutExt.slice(0, -"/init".length));
4264
+ recordAlias(aliases, repoWithoutExt.slice(0, -"/init".length).replace(/\//g, "."));
4265
+ }
4266
+ }
4267
+ break;
4268
+ case "zig":
4269
+ recordAlias(aliases, basename);
4270
+ break;
4052
4271
  case "php":
4053
4272
  if (normalizedNamespace) {
4054
4273
  recordAlias(aliases, `${normalizedNamespace}\\${basename}`);
@@ -4135,6 +4354,16 @@ function resolveRustAliases(manifest, specifier) {
4135
4354
  `crate${currentParts.length > 1 ? `::${currentParts.slice(0, -1).join("::")}` : ""}::${specifier.slice("super::".length)}`.replace(/::+/g, "::").replace(/::$/, "")
4136
4355
  ];
4137
4356
  }
4357
+ function luaSpecifierLooksLocal(specifier) {
4358
+ return /^[A-Za-z_][A-Za-z0-9_]*(?:[./][A-Za-z_][A-Za-z0-9_]*)*$/.test(specifier);
4359
+ }
4360
+ function resolveLuaModuleCandidates(specifier) {
4361
+ const normalized = normalizeAlias(specifier.replace(/\./g, "/"));
4362
+ if (!normalized) {
4363
+ return [];
4364
+ }
4365
+ return uniqueBy([`${normalized}.lua`, path6.posix.join(normalized, "init.lua")], (item) => item);
4366
+ }
4138
4367
  function findImportCandidates(manifest, codeImport, lookup) {
4139
4368
  const language = manifest.language ?? inferCodeLanguage(manifest.originalPath ?? manifest.storedPath, manifest.mimeType);
4140
4369
  const repoRelativePath = manifest.repoRelativePath ? normalizeAlias(manifest.repoRelativePath) : void 0;
@@ -4158,6 +4387,10 @@ function findImportCandidates(manifest, codeImport, lookup) {
4158
4387
  case "scala":
4159
4388
  case "csharp":
4160
4389
  return aliasMatches(lookup, codeImport.specifier);
4390
+ case "lua":
4391
+ return luaSpecifierLooksLocal(codeImport.specifier) ? repoPathMatches(lookup, ...resolveLuaModuleCandidates(codeImport.specifier)) : aliasMatches(lookup, codeImport.specifier, codeImport.specifier.replace(/\./g, "/"));
4392
+ case "zig":
4393
+ return repoRelativePath && (!codeImport.isExternal || codeImport.specifier.endsWith(".zig")) ? repoPathMatches(lookup, ...importResolutionCandidates(repoRelativePath, codeImport.specifier, candidateExtensionsFor(language))) : aliasMatches(lookup, codeImport.specifier);
4161
4394
  case "php":
4162
4395
  case "ruby":
4163
4396
  case "powershell":
@@ -4205,6 +4438,10 @@ function importLooksLocal(manifest, codeImport, candidates) {
4205
4438
  case "kotlin":
4206
4439
  case "scala":
4207
4440
  return !codeImport.isExternal;
4441
+ case "lua":
4442
+ return luaSpecifierLooksLocal(codeImport.specifier);
4443
+ case "zig":
4444
+ return !codeImport.isExternal || codeImport.specifier.endsWith(".zig");
4208
4445
  default:
4209
4446
  return false;
4210
4447
  }
@@ -4244,9 +4481,10 @@ async function analyzeCodeSource(manifest, extractedText, schemaHash) {
4244
4481
  const language = manifest.language ?? inferCodeLanguage(manifest.originalPath ?? manifest.storedPath, manifest.mimeType) ?? "typescript";
4245
4482
  const { code, rationales } = language === "javascript" || language === "jsx" || language === "typescript" || language === "tsx" ? analyzeTypeScriptLikeCode(manifest, extractedText) : await analyzeTreeSitterCode(manifest, extractedText, language);
4246
4483
  return {
4247
- analysisVersion: 6,
4484
+ analysisVersion: 7,
4248
4485
  sourceId: manifest.sourceId,
4249
4486
  sourceHash: manifest.contentHash,
4487
+ semanticHash: manifest.semanticHash,
4250
4488
  extractionHash: manifest.extractionHash,
4251
4489
  schemaHash,
4252
4490
  title: manifest.title,
@@ -4971,6 +5209,17 @@ var HARD_REPO_IGNORES = /* @__PURE__ */ new Set([".git", ".venv"]);
4971
5209
  var PROGRESS_FILE_THRESHOLD = 150;
4972
5210
  var PROGRESS_UPDATE_INTERVAL = 100;
4973
5211
  var RST_HEADING_MARKERS = /* @__PURE__ */ new Set(["=", "-", "~", "^", '"', "#", "*", "+"]);
5212
+ var MARKDOWN_SEMANTIC_FRONTMATTER_KEYS = [
5213
+ "title",
5214
+ "summary",
5215
+ "description",
5216
+ "aliases",
5217
+ "tags",
5218
+ "authors",
5219
+ "published_at",
5220
+ "canonical_url",
5221
+ "source_type"
5222
+ ];
4974
5223
  function uniqueStrings(values) {
4975
5224
  return [...new Set(values.filter(Boolean))];
4976
5225
  }
@@ -5108,6 +5357,65 @@ function extractedTextForPlainSource(filePath, sourceKind, content) {
5108
5357
  }
5109
5358
  return content;
5110
5359
  }
5360
+ function normalizeSemanticMarkdownScalar(value) {
5361
+ if (typeof value !== "string") {
5362
+ return void 0;
5363
+ }
5364
+ const normalized = normalizeWhitespace(value.trim());
5365
+ return normalized || void 0;
5366
+ }
5367
+ function normalizeSemanticMarkdownList(value) {
5368
+ if (!Array.isArray(value)) {
5369
+ return void 0;
5370
+ }
5371
+ const items = uniqueStrings(
5372
+ value.flatMap((item) => typeof item === "string" ? [normalizeWhitespace(item.trim())] : []).filter(Boolean)
5373
+ );
5374
+ return items.length ? items : void 0;
5375
+ }
5376
+ function semanticMarkdownTitle(fallback, content, filePath) {
5377
+ const parsed = matter3(content);
5378
+ const frontmatterTitle = normalizeSemanticMarkdownScalar(parsed.data.title);
5379
+ if (frontmatterTitle) {
5380
+ return frontmatterTitle;
5381
+ }
5382
+ return titleFromText(fallback, parsed.content, filePath);
5383
+ }
5384
+ function semanticMarkdownContent(content) {
5385
+ const parsed = matter3(content);
5386
+ const body = parsed.content.replace(/\r\n?/g, "\n").trim();
5387
+ const semanticFrontmatter = Object.fromEntries(
5388
+ MARKDOWN_SEMANTIC_FRONTMATTER_KEYS.flatMap((key) => {
5389
+ const value = key === "aliases" || key === "tags" || key === "authors" ? normalizeSemanticMarkdownList(parsed.data[key]) : normalizeSemanticMarkdownScalar(parsed.data[key]);
5390
+ return value === void 0 ? [] : [[key, value]];
5391
+ })
5392
+ );
5393
+ const semanticLines = Object.entries(semanticFrontmatter).map(
5394
+ ([key, value]) => `${key}: ${Array.isArray(value) ? value.join(", ") : value}`
5395
+ );
5396
+ const extractedText = [...semanticLines, ...semanticLines.length && body ? [""] : [], body].filter(Boolean).join("\n").trim();
5397
+ return {
5398
+ extractedText,
5399
+ semanticHash: sha256(
5400
+ JSON.stringify({
5401
+ body,
5402
+ frontmatter: semanticFrontmatter
5403
+ })
5404
+ )
5405
+ };
5406
+ }
5407
+ function finalizePreparedInput(prepared) {
5408
+ if (prepared.sourceKind !== "markdown") {
5409
+ return prepared;
5410
+ }
5411
+ const semantic = semanticMarkdownContent(prepared.payloadBytes.toString("utf8"));
5412
+ return {
5413
+ ...prepared,
5414
+ extractedText: semantic.extractedText,
5415
+ extractionHash: buildExtractionHash(semantic.extractedText, prepared.extractionArtifact),
5416
+ semanticHash: semantic.semanticHash
5417
+ };
5418
+ }
5111
5419
  function shouldEmitProgress(totalItems) {
5112
5420
  return totalItems >= PROGRESS_FILE_THRESHOLD && Boolean(process.stderr?.isTTY);
5113
5421
  }
@@ -5274,7 +5582,7 @@ function markdownFrontmatter(value) {
5274
5582
  return matter3.stringify("", normalized).trimEnd().split("\n").concat([""]);
5275
5583
  }
5276
5584
  function prepareCapturedMarkdownInput(input) {
5277
- return {
5585
+ return finalizePreparedInput({
5278
5586
  title: input.title,
5279
5587
  originType: "url",
5280
5588
  sourceKind: "markdown",
@@ -5286,7 +5594,7 @@ function prepareCapturedMarkdownInput(input) {
5286
5594
  extractedText: input.markdown,
5287
5595
  attachments: input.attachments,
5288
5596
  logDetails: input.logDetails
5289
- };
5597
+ });
5290
5598
  }
5291
5599
  function isPrivateIp(ip) {
5292
5600
  if (ip === "::1" || ip.startsWith("fc") || ip.startsWith("fd")) return true;
@@ -5649,7 +5957,10 @@ async function readManifestByHash(manifestsDir, contentHash) {
5649
5957
  }
5650
5958
  const manifest = await readJsonFile(path12.join(manifestsDir, entry.name));
5651
5959
  if (manifest?.contentHash === contentHash) {
5652
- return manifest;
5960
+ return {
5961
+ ...manifest,
5962
+ semanticHash: manifest.semanticHash ?? manifest.contentHash
5963
+ };
5653
5964
  }
5654
5965
  }
5655
5966
  return null;
@@ -5662,7 +5973,10 @@ async function readManifestByOrigin(manifestsDir, prepared) {
5662
5973
  }
5663
5974
  const manifest = await readJsonFile(path12.join(manifestsDir, entry.name));
5664
5975
  if (manifest && manifestMatchesOrigin(manifest, prepared)) {
5665
- return manifest;
5976
+ return {
5977
+ ...manifest,
5978
+ semanticHash: manifest.semanticHash ?? manifest.contentHash
5979
+ };
5666
5980
  }
5667
5981
  }
5668
5982
  return null;
@@ -5911,10 +6225,11 @@ async function persistPreparedInput(rootDir, prepared, paths) {
5911
6225
  await ensureDir(paths.extractsDir);
5912
6226
  const attachments = prepared.attachments ?? [];
5913
6227
  const contentHash = prepared.contentHash ?? buildCompositeHash(prepared.payloadBytes, attachments);
6228
+ const semanticHash = prepared.semanticHash ?? contentHash;
5914
6229
  const extractionHash = prepared.extractionHash ?? buildExtractionHash(prepared.extractedText, prepared.extractionArtifact);
5915
6230
  const existingByOrigin = await readManifestByOrigin(paths.manifestsDir, prepared);
5916
6231
  const existingByHash = existingByOrigin ? null : await readManifestByHash(paths.manifestsDir, contentHash);
5917
- if (existingByOrigin && existingByOrigin.contentHash === contentHash && existingByOrigin.extractionHash === extractionHash && existingByOrigin.title === prepared.title && existingByOrigin.sourceKind === prepared.sourceKind && existingByOrigin.sourceType === prepared.sourceType && existingByOrigin.sourceClass === prepared.sourceClass && existingByOrigin.language === prepared.language && existingByOrigin.mimeType === prepared.mimeType && existingByOrigin.repoRelativePath === prepared.repoRelativePath) {
6232
+ if (existingByOrigin && existingByOrigin.contentHash === contentHash && existingByOrigin.semanticHash === semanticHash && existingByOrigin.extractionHash === extractionHash && existingByOrigin.title === prepared.title && existingByOrigin.sourceKind === prepared.sourceKind && existingByOrigin.sourceType === prepared.sourceType && existingByOrigin.sourceClass === prepared.sourceClass && existingByOrigin.language === prepared.language && existingByOrigin.mimeType === prepared.mimeType && existingByOrigin.repoRelativePath === prepared.repoRelativePath) {
5918
6233
  return { manifest: existingByOrigin, isNew: false, wasUpdated: false };
5919
6234
  }
5920
6235
  if (existingByHash) {
@@ -5972,6 +6287,7 @@ async function persistPreparedInput(rootDir, prepared, paths) {
5972
6287
  extractionHash,
5973
6288
  mimeType: prepared.mimeType,
5974
6289
  contentHash,
6290
+ semanticHash,
5975
6291
  createdAt: previous?.createdAt ?? now,
5976
6292
  updatedAt: now,
5977
6293
  attachments: manifestAttachments.length ? manifestAttachments : void 0
@@ -6286,14 +6602,15 @@ async function prepareFileInput(rootDir, absoluteInput, repoRoot, sourceClass) {
6286
6602
  let extractedText;
6287
6603
  let extractionArtifact;
6288
6604
  if (sourceKind === "markdown" || sourceKind === "text" || sourceKind === "code") {
6289
- extractedText = extractedTextForPlainSource(absoluteInput, sourceKind, payloadBytes.toString("utf8"));
6290
- title = titleFromText(path12.basename(absoluteInput, path12.extname(absoluteInput)), extractedText, absoluteInput);
6605
+ const rawText = payloadBytes.toString("utf8");
6606
+ extractedText = sourceKind === "markdown" ? semanticMarkdownContent(rawText).extractedText : extractedTextForPlainSource(absoluteInput, sourceKind, rawText);
6607
+ title = sourceKind === "markdown" ? semanticMarkdownTitle(path12.basename(absoluteInput, path12.extname(absoluteInput)), rawText, absoluteInput) : titleFromText(path12.basename(absoluteInput, path12.extname(absoluteInput)), extractedText, absoluteInput);
6291
6608
  extractionArtifact = createPlainTextExtractionArtifact(sourceKind, mimeType);
6292
6609
  } else if (sourceKind === "html") {
6293
6610
  const html = payloadBytes.toString("utf8");
6294
6611
  const converted = await convertHtmlToMarkdown(html, pathToFileURL(absoluteInput).toString());
6295
6612
  title = converted.title;
6296
- extractedText = converted.markdown;
6613
+ extractedText = semanticMarkdownContent(converted.markdown).extractedText;
6297
6614
  extractionArtifact = createHtmlReadabilityExtractionArtifact(sourceKind, mimeType);
6298
6615
  } else if (sourceKind === "pdf") {
6299
6616
  title = path12.basename(absoluteInput, path12.extname(absoluteInput));
@@ -6319,7 +6636,7 @@ async function prepareFileInput(rootDir, absoluteInput, repoRoot, sourceClass) {
6319
6636
  } else {
6320
6637
  title = path12.basename(absoluteInput, path12.extname(absoluteInput));
6321
6638
  }
6322
- return {
6639
+ return finalizePreparedInput({
6323
6640
  title,
6324
6641
  originType: "file",
6325
6642
  sourceKind,
@@ -6333,7 +6650,7 @@ async function prepareFileInput(rootDir, absoluteInput, repoRoot, sourceClass) {
6333
6650
  extractedText,
6334
6651
  extractionArtifact,
6335
6652
  extractionHash: buildExtractionHash(extractedText, extractionArtifact)
6336
- };
6653
+ });
6337
6654
  }
6338
6655
  async function prepareUrlInput(rootDir, input, options) {
6339
6656
  await validateUrlSafety(input);
@@ -6397,8 +6714,9 @@ async function prepareUrlInput(rootDir, input, options) {
6397
6714
  const extension = path12.extname(inputUrl.pathname);
6398
6715
  storedExtension = extension || `.${mime.extension(mimeType) || "bin"}`;
6399
6716
  if (sourceKind === "markdown" || sourceKind === "text" || sourceKind === "code") {
6400
- extractedText = extractedTextForPlainSource(inputUrl.pathname, sourceKind, payloadBytes.toString("utf8"));
6401
- title = titleFromText(title || inputUrl.hostname, extractedText, inputUrl.pathname);
6717
+ const rawText = payloadBytes.toString("utf8");
6718
+ extractedText = sourceKind === "markdown" ? semanticMarkdownContent(rawText).extractedText : extractedTextForPlainSource(inputUrl.pathname, sourceKind, rawText);
6719
+ title = sourceKind === "markdown" ? semanticMarkdownTitle(title || inputUrl.hostname, rawText, inputUrl.pathname) : titleFromText(title || inputUrl.hostname, extractedText, inputUrl.pathname);
6402
6720
  extractionArtifact = createPlainTextExtractionArtifact(sourceKind, mimeType);
6403
6721
  if (sourceKind === "markdown" && options.includeAssets) {
6404
6722
  const { attachments: remoteAttachments, skippedCount } = await collectRemoteImageAttachments(
@@ -6440,7 +6758,7 @@ async function prepareUrlInput(rootDir, input, options) {
6440
6758
  extractionArtifact = extracted.artifact;
6441
6759
  }
6442
6760
  }
6443
- return {
6761
+ return finalizePreparedInput({
6444
6762
  title,
6445
6763
  originType: "url",
6446
6764
  sourceKind,
@@ -6455,7 +6773,7 @@ async function prepareUrlInput(rootDir, input, options) {
6455
6773
  attachments,
6456
6774
  contentHash,
6457
6775
  logDetails
6458
- };
6776
+ });
6459
6777
  }
6460
6778
  async function collectInboxAttachmentRefs(inputDir, files) {
6461
6779
  const refsBySource = /* @__PURE__ */ new Map();
@@ -6529,7 +6847,7 @@ async function prepareInboxMarkdownInput(absolutePath, attachmentRefs) {
6529
6847
  );
6530
6848
  const rewrittenText = rewriteMarkdownReferences(originalText, replacements);
6531
6849
  const extractionArtifact = createPlainTextExtractionArtifact("markdown", "text/markdown");
6532
- return {
6850
+ return finalizePreparedInput({
6533
6851
  title,
6534
6852
  originType: "file",
6535
6853
  sourceKind: "markdown",
@@ -6542,7 +6860,7 @@ async function prepareInboxMarkdownInput(absolutePath, attachmentRefs) {
6542
6860
  extractionHash: buildExtractionHash(rewrittenText, extractionArtifact),
6543
6861
  attachments,
6544
6862
  contentHash
6545
- };
6863
+ });
6546
6864
  }
6547
6865
  async function prepareInboxHtmlInput(absolutePath, attachmentRefs) {
6548
6866
  const originalBytes = await fs11.readFile(absolutePath);
@@ -6784,7 +7102,10 @@ async function listManifests(rootDir) {
6784
7102
  const manifests = await Promise.all(
6785
7103
  entries.filter((entry) => entry.endsWith(".json")).map((entry) => readJsonFile(path12.join(paths.manifestsDir, entry)))
6786
7104
  );
6787
- return manifests.filter((manifest) => Boolean(manifest));
7105
+ return manifests.filter((manifest) => Boolean(manifest)).map((manifest) => ({
7106
+ ...manifest,
7107
+ semanticHash: manifest.semanticHash ?? manifest.contentHash
7108
+ }));
6788
7109
  }
6789
7110
  async function removeManifestBySourceId(rootDir, sourceId) {
6790
7111
  const { paths } = await initWorkspace(rootDir);
@@ -6792,8 +7113,12 @@ async function removeManifestBySourceId(rootDir, sourceId) {
6792
7113
  if (!manifest) {
6793
7114
  return null;
6794
7115
  }
6795
- await removeManifestArtifacts(rootDir, manifest, paths);
6796
- return manifest;
7116
+ const normalizedManifest = {
7117
+ ...manifest,
7118
+ semanticHash: manifest.semanticHash ?? manifest.contentHash
7119
+ };
7120
+ await removeManifestArtifacts(rootDir, normalizedManifest, paths);
7121
+ return normalizedManifest;
6797
7122
  }
6798
7123
  async function readExtractedText(rootDir, manifest) {
6799
7124
  if (!manifest.extractedTextPath) {
@@ -6939,7 +7264,7 @@ import { z as z7 } from "zod";
6939
7264
  // src/analysis.ts
6940
7265
  import path14 from "path";
6941
7266
  import { z as z2 } from "zod";
6942
- var ANALYSIS_FORMAT_VERSION = 6;
7267
+ var ANALYSIS_FORMAT_VERSION = 7;
6943
7268
  var sourceAnalysisSchema = z2.object({
6944
7269
  title: z2.string().min(1),
6945
7270
  summary: z2.string().min(1),
@@ -7044,6 +7369,7 @@ function heuristicAnalysis(manifest, text, schemaHash) {
7044
7369
  analysisVersion: ANALYSIS_FORMAT_VERSION,
7045
7370
  sourceId: manifest.sourceId,
7046
7371
  sourceHash: manifest.contentHash,
7372
+ semanticHash: manifest.semanticHash,
7047
7373
  extractionHash: manifest.extractionHash,
7048
7374
  schemaHash,
7049
7375
  title: deriveTitle(manifest, text),
@@ -7094,6 +7420,7 @@ ${truncate(text, 18e3)}`
7094
7420
  analysisVersion: ANALYSIS_FORMAT_VERSION,
7095
7421
  sourceId: manifest.sourceId,
7096
7422
  sourceHash: manifest.contentHash,
7423
+ semanticHash: manifest.semanticHash,
7097
7424
  extractionHash: manifest.extractionHash,
7098
7425
  schemaHash: schema.hash,
7099
7426
  title: parsed.title,
@@ -7130,6 +7457,7 @@ function analysisFromVisionExtraction(manifest, extraction, schemaHash) {
7130
7457
  analysisVersion: ANALYSIS_FORMAT_VERSION,
7131
7458
  sourceId: manifest.sourceId,
7132
7459
  sourceHash: manifest.contentHash,
7460
+ semanticHash: manifest.semanticHash,
7133
7461
  extractionHash: manifest.extractionHash,
7134
7462
  schemaHash,
7135
7463
  title: extraction.vision.title?.trim() || manifest.title,
@@ -7168,7 +7496,7 @@ function extractionWarningSummary(manifest, extraction) {
7168
7496
  async function analyzeSource(manifest, extractedText, provider, paths, schema) {
7169
7497
  const cachePath = path14.join(paths.analysesDir, `${manifest.sourceId}.json`);
7170
7498
  const cached = await readJsonFile(cachePath);
7171
- if (cached && cached.analysisVersion === ANALYSIS_FORMAT_VERSION && cached.sourceHash === manifest.contentHash && cached.extractionHash === manifest.extractionHash && cached.schemaHash === schema.hash) {
7499
+ if (cached && cached.analysisVersion === ANALYSIS_FORMAT_VERSION && (cached.semanticHash ?? cached.sourceHash) === manifest.semanticHash && cached.extractionHash === manifest.extractionHash && cached.schemaHash === schema.hash) {
7172
7500
  return cached;
7173
7501
  }
7174
7502
  const extraction = await readExtractionArtifact(paths.rootDir, manifest);
@@ -7185,6 +7513,7 @@ async function analyzeSource(manifest, extractedText, provider, paths, schema) {
7185
7513
  analysisVersion: ANALYSIS_FORMAT_VERSION,
7186
7514
  sourceId: manifest.sourceId,
7187
7515
  sourceHash: manifest.contentHash,
7516
+ semanticHash: manifest.semanticHash,
7188
7517
  extractionHash: manifest.extractionHash,
7189
7518
  schemaHash: schema.hash,
7190
7519
  title: manifest.title,
@@ -7211,6 +7540,7 @@ async function analyzeSource(manifest, extractedText, provider, paths, schema) {
7211
7540
  analysisVersion: ANALYSIS_FORMAT_VERSION,
7212
7541
  sourceId: manifest.sourceId,
7213
7542
  sourceHash: manifest.contentHash,
7543
+ semanticHash: manifest.semanticHash,
7214
7544
  extractionHash: manifest.extractionHash,
7215
7545
  schemaHash: schema.hash,
7216
7546
  title: manifest.title,
@@ -7994,7 +8324,9 @@ async function resolveEmbeddingProvider(rootDir) {
7994
8324
  }
7995
8325
  const provider2 = await createProvider(explicitProviderId, providerConfig, rootDir);
7996
8326
  if (!provider2.capabilities.has("embeddings") || typeof provider2.embedTexts !== "function") {
7997
- throw new Error(`Provider ${provider2.id} does not support required capability "embeddings".`);
8327
+ throw new Error(
8328
+ `Provider ${provider2.id} does not support required capability "embeddings". Configure tasks.embeddingProvider to use an embedding-capable backend such as ollama or another openai-compatible embedding service.`
8329
+ );
7998
8330
  }
7999
8331
  return provider2;
8000
8332
  }
@@ -8890,6 +9222,18 @@ function uniqueStrings2(values) {
8890
9222
  function safeFrontmatter(value) {
8891
9223
  return JSON.parse(JSON.stringify(value));
8892
9224
  }
9225
+ function sourceHashesForManifest(manifest) {
9226
+ return {
9227
+ sourceHashes: { [manifest.sourceId]: manifest.contentHash },
9228
+ sourceSemanticHashes: { [manifest.sourceId]: manifest.semanticHash }
9229
+ };
9230
+ }
9231
+ function sourceHashFrontmatter(sourceHashes, sourceSemanticHashes) {
9232
+ return {
9233
+ source_hashes: sourceHashes,
9234
+ source_semantic_hashes: sourceSemanticHashes
9235
+ };
9236
+ }
8893
9237
  function decoratedTags(baseTags, decorations) {
8894
9238
  return uniqueStrings2([
8895
9239
  ...baseTags,
@@ -8953,6 +9297,7 @@ function relatedOutputsSection(relatedOutputs) {
8953
9297
  function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutputs = [], modulePage, decorations) {
8954
9298
  const relativePath = pagePathFor("source", manifest.sourceId);
8955
9299
  const pageId = `source:${manifest.sourceId}`;
9300
+ const { sourceHashes, sourceSemanticHashes } = sourceHashesForManifest(manifest);
8956
9301
  const moduleNodeIds = analysis.code ? [analysis.code.moduleId, ...analysis.code.symbols.map((symbol) => symbol.id)] : [];
8957
9302
  const nodeIds = [
8958
9303
  `source:${manifest.sourceId}`,
@@ -8985,9 +9330,7 @@ function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutput
8985
9330
  managed_by: metadata.managedBy,
8986
9331
  backlinks,
8987
9332
  schema_hash: schemaHash,
8988
- source_hashes: {
8989
- [manifest.sourceId]: manifest.contentHash
8990
- }
9333
+ ...sourceHashFrontmatter(sourceHashes, sourceSemanticHashes)
8991
9334
  };
8992
9335
  const body = [
8993
9336
  `# ${analysis.title}`,
@@ -9050,7 +9393,8 @@ function buildSourcePage(manifest, analysis, schemaHash, metadata, relatedOutput
9050
9393
  confidence: metadata.confidence,
9051
9394
  backlinks,
9052
9395
  schemaHash,
9053
- sourceHashes: { [manifest.sourceId]: manifest.contentHash },
9396
+ sourceHashes,
9397
+ sourceSemanticHashes,
9054
9398
  relatedPageIds: [...modulePage ? [modulePage.id] : [], ...relatedOutputs.map((page) => page.id)],
9055
9399
  relatedNodeIds: moduleNodeIds,
9056
9400
  relatedSourceIds: [],
@@ -9075,6 +9419,7 @@ function buildModulePage(input) {
9075
9419
  const localModuleBacklinks = input.localModules.map((moduleRef) => moduleRef.page.id);
9076
9420
  const relatedOutputs = input.relatedOutputs ?? [];
9077
9421
  const backlinks = uniqueStrings2([sourcePage.id, ...localModuleBacklinks, ...relatedOutputs.map((page) => page.id)]);
9422
+ const { sourceHashes, sourceSemanticHashes } = sourceHashesForManifest(manifest);
9078
9423
  const importsSection = code.imports.length ? code.imports.map((item) => {
9079
9424
  const localModule = item.resolvedSourceId ? input.localModules.find((moduleRef) => moduleRef.sourceId === item.resolvedSourceId && moduleRef.reExport === item.reExport) : void 0;
9080
9425
  const importedBits = [
@@ -9118,9 +9463,7 @@ function buildModulePage(input) {
9118
9463
  managed_by: metadata.managedBy,
9119
9464
  backlinks,
9120
9465
  schema_hash: schemaHash,
9121
- source_hashes: {
9122
- [manifest.sourceId]: manifest.contentHash
9123
- },
9466
+ ...sourceHashFrontmatter(sourceHashes, sourceSemanticHashes),
9124
9467
  related_page_ids: uniqueStrings2([sourcePage.id, ...localModuleBacklinks, ...relatedOutputs.map((page) => page.id)]),
9125
9468
  related_node_ids: [],
9126
9469
  related_source_ids: uniqueStrings2([
@@ -9196,7 +9539,8 @@ function buildModulePage(input) {
9196
9539
  confidence: metadata.confidence,
9197
9540
  backlinks,
9198
9541
  schemaHash,
9199
- sourceHashes: { [manifest.sourceId]: manifest.contentHash },
9542
+ sourceHashes,
9543
+ sourceSemanticHashes,
9200
9544
  relatedPageIds: uniqueStrings2([sourcePage.id, ...localModuleBacklinks, ...relatedOutputs.map((page) => page.id)]),
9201
9545
  relatedNodeIds: [],
9202
9546
  relatedSourceIds: uniqueStrings2([
@@ -9212,7 +9556,7 @@ function buildModulePage(input) {
9212
9556
  content: matter5.stringify(body, frontmatter)
9213
9557
  };
9214
9558
  }
9215
- function buildAggregatePage(kind, name, descriptions, sourceAnalyses, sourceHashes, schemaHash, metadata, relativePath, relatedOutputs = [], decorations) {
9559
+ function buildAggregatePage(kind, name, descriptions, sourceAnalyses, sourceHashes, sourceSemanticHashes, schemaHash, metadata, relativePath, relatedOutputs = [], decorations) {
9216
9560
  const slug = slugify(name);
9217
9561
  const pageId = `${kind}:${slug}`;
9218
9562
  const sourceIds = sourceAnalyses.map((item) => item.sourceId);
@@ -9236,7 +9580,7 @@ function buildAggregatePage(kind, name, descriptions, sourceAnalyses, sourceHash
9236
9580
  managed_by: metadata.managedBy,
9237
9581
  backlinks: otherPages,
9238
9582
  schema_hash: schemaHash,
9239
- source_hashes: sourceHashes
9583
+ ...sourceHashFrontmatter(sourceHashes, sourceSemanticHashes)
9240
9584
  };
9241
9585
  const body = [
9242
9586
  `# ${name}`,
@@ -9274,6 +9618,7 @@ function buildAggregatePage(kind, name, descriptions, sourceAnalyses, sourceHash
9274
9618
  backlinks: otherPages,
9275
9619
  schemaHash,
9276
9620
  sourceHashes,
9621
+ sourceSemanticHashes,
9277
9622
  relatedPageIds: relatedOutputs.map((page) => page.id),
9278
9623
  relatedNodeIds: [],
9279
9624
  relatedSourceIds: [],
@@ -9314,6 +9659,7 @@ function buildIndexPage(pages, schemaHash, metadata, projectPages = []) {
9314
9659
  "backlinks: []",
9315
9660
  `schema_hash: ${schemaHash}`,
9316
9661
  "source_hashes: {}",
9662
+ "source_semantic_hashes: {}",
9317
9663
  "---",
9318
9664
  "",
9319
9665
  "# SwarmVault Index",
@@ -9377,7 +9723,8 @@ function buildSectionIndex(kind, pages, schemaHash, metadata, projectIds = []) {
9377
9723
  managed_by: metadata.managedBy,
9378
9724
  backlinks: [],
9379
9725
  schema_hash: schemaHash,
9380
- source_hashes: {}
9726
+ source_hashes: {},
9727
+ source_semantic_hashes: {}
9381
9728
  }
9382
9729
  );
9383
9730
  }
@@ -9673,6 +10020,7 @@ function buildGraphReportPage(input) {
9673
10020
  backlinks: [],
9674
10021
  schema_hash: input.schemaHash,
9675
10022
  source_hashes: {},
10023
+ source_semantic_hashes: {},
9676
10024
  related_page_ids: relatedPageIds,
9677
10025
  related_node_ids: relatedNodeIds,
9678
10026
  related_source_ids: relatedSourceIds
@@ -9788,6 +10136,7 @@ function buildGraphReportPage(input) {
9788
10136
  backlinks: [],
9789
10137
  schemaHash: input.schemaHash,
9790
10138
  sourceHashes: {},
10139
+ sourceSemanticHashes: {},
9791
10140
  relatedPageIds,
9792
10141
  relatedNodeIds,
9793
10142
  relatedSourceIds,
@@ -9831,6 +10180,7 @@ function buildCommunitySummaryPage(input) {
9831
10180
  backlinks: ["graph:report"],
9832
10181
  schema_hash: input.schemaHash,
9833
10182
  source_hashes: {},
10183
+ source_semantic_hashes: {},
9834
10184
  related_page_ids: uniqueStrings2(["graph:report", ...communityPageIds]),
9835
10185
  related_node_ids: input.community.nodeIds,
9836
10186
  related_source_ids: relatedSourceIds
@@ -9870,6 +10220,7 @@ function buildCommunitySummaryPage(input) {
9870
10220
  backlinks: ["graph:report"],
9871
10221
  schemaHash: input.schemaHash,
9872
10222
  sourceHashes: {},
10223
+ sourceSemanticHashes: {},
9873
10224
  relatedPageIds: uniqueStrings2(["graph:report", ...communityPageIds]),
9874
10225
  relatedNodeIds: input.community.nodeIds,
9875
10226
  relatedSourceIds,
@@ -9906,7 +10257,8 @@ function buildProjectsIndex(projectPages, schemaHash, metadata) {
9906
10257
  managed_by: metadata.managedBy,
9907
10258
  backlinks: [],
9908
10259
  schema_hash: schemaHash,
9909
- source_hashes: {}
10260
+ source_hashes: {},
10261
+ source_semantic_hashes: {}
9910
10262
  }
9911
10263
  );
9912
10264
  }
@@ -9958,7 +10310,8 @@ function buildProjectIndex(input) {
9958
10310
  managed_by: input.metadata.managedBy,
9959
10311
  backlinks: [],
9960
10312
  schema_hash: input.schemaHash,
9961
- source_hashes: {}
10313
+ source_hashes: {},
10314
+ source_semantic_hashes: {}
9962
10315
  }
9963
10316
  );
9964
10317
  }
@@ -9989,6 +10342,7 @@ function buildOutputPage(input) {
9989
10342
  backlinks,
9990
10343
  schema_hash: input.schemaHash,
9991
10344
  source_hashes: {},
10345
+ source_semantic_hashes: {},
9992
10346
  related_page_ids: relatedPageIds,
9993
10347
  related_node_ids: relatedNodeIds,
9994
10348
  related_source_ids: relatedSourceIds,
@@ -10013,6 +10367,7 @@ function buildOutputPage(input) {
10013
10367
  backlinks,
10014
10368
  schemaHash: input.schemaHash,
10015
10369
  sourceHashes: {},
10370
+ sourceSemanticHashes: {},
10016
10371
  relatedPageIds,
10017
10372
  relatedNodeIds,
10018
10373
  relatedSourceIds,
@@ -10115,6 +10470,7 @@ function buildExploreHubPage(input) {
10115
10470
  backlinks,
10116
10471
  schema_hash: input.schemaHash,
10117
10472
  source_hashes: {},
10473
+ source_semantic_hashes: {},
10118
10474
  related_page_ids: relatedPageIds,
10119
10475
  related_node_ids: relatedNodeIds,
10120
10476
  related_source_ids: relatedSourceIds,
@@ -10139,6 +10495,7 @@ function buildExploreHubPage(input) {
10139
10495
  backlinks,
10140
10496
  schemaHash: input.schemaHash,
10141
10497
  sourceHashes: {},
10498
+ sourceSemanticHashes: {},
10142
10499
  relatedPageIds,
10143
10500
  relatedNodeIds,
10144
10501
  relatedSourceIds,
@@ -10437,6 +10794,9 @@ function normalizeSourceHashes(value) {
10437
10794
  Object.entries(value).filter((entry) => typeof entry[0] === "string" && typeof entry[1] === "string")
10438
10795
  );
10439
10796
  }
10797
+ function normalizeSourceSemanticHashes(value) {
10798
+ return normalizeSourceHashes(value);
10799
+ }
10440
10800
  function normalizePageStatus(value, fallback = "active") {
10441
10801
  return value === "draft" || value === "candidate" || value === "active" || value === "archived" ? value : fallback;
10442
10802
  }
@@ -10565,6 +10925,7 @@ function parseStoredPage(relativePath, content, defaults = {}) {
10565
10925
  backlinks,
10566
10926
  schemaHash: typeof parsed.data.schema_hash === "string" ? parsed.data.schema_hash : "",
10567
10927
  sourceHashes: normalizeSourceHashes(parsed.data.source_hashes),
10928
+ sourceSemanticHashes: normalizeSourceSemanticHashes(parsed.data.source_semantic_hashes),
10568
10929
  relatedPageIds,
10569
10930
  relatedNodeIds,
10570
10931
  relatedSourceIds,
@@ -10618,6 +10979,7 @@ async function loadInsightPages(wikiDir) {
10618
10979
  backlinks,
10619
10980
  schemaHash: typeof parsed.data.schema_hash === "string" ? parsed.data.schema_hash : "",
10620
10981
  sourceHashes: normalizeSourceHashes(parsed.data.source_hashes),
10982
+ sourceSemanticHashes: normalizeSourceSemanticHashes(parsed.data.source_semantic_hashes),
10621
10983
  relatedPageIds,
10622
10984
  relatedNodeIds,
10623
10985
  relatedSourceIds,
@@ -10718,6 +11080,7 @@ async function loadSavedOutputPages(wikiDir) {
10718
11080
  backlinks,
10719
11081
  schemaHash: typeof parsed.data.schema_hash === "string" ? parsed.data.schema_hash : "",
10720
11082
  sourceHashes: normalizeSourceHashes(parsed.data.source_hashes),
11083
+ sourceSemanticHashes: normalizeSourceSemanticHashes(parsed.data.source_semantic_hashes),
10721
11084
  relatedPageIds,
10722
11085
  relatedNodeIds,
10723
11086
  relatedSourceIds,
@@ -12194,11 +12557,13 @@ function aggregateItems(analyses, kind) {
12194
12557
  name: item.name,
12195
12558
  descriptions: [],
12196
12559
  sourceAnalyses: [],
12197
- sourceHashes: {}
12560
+ sourceHashes: {},
12561
+ sourceSemanticHashes: {}
12198
12562
  };
12199
12563
  existing.descriptions.push(item.description);
12200
12564
  existing.sourceAnalyses.push(analysis);
12201
12565
  existing.sourceHashes[analysis.sourceId] = analysis.sourceHash;
12566
+ existing.sourceSemanticHashes[analysis.sourceId] = analysis.semanticHash;
12202
12567
  grouped.set(key, existing);
12203
12568
  }
12204
12569
  }
@@ -12220,6 +12585,7 @@ function emptyGraphPage(input) {
12220
12585
  backlinks: [],
12221
12586
  schemaHash: input.schemaHash,
12222
12587
  sourceHashes: input.sourceHashes,
12588
+ sourceSemanticHashes: input.sourceSemanticHashes ?? {},
12223
12589
  relatedPageIds: [],
12224
12590
  relatedNodeIds: [],
12225
12591
  relatedSourceIds: [],
@@ -12384,6 +12750,7 @@ async function syncVaultArtifacts(rootDir, input) {
12384
12750
  nodeIds: [analysis.code.moduleId, ...analysis.code.symbols.map((symbol) => symbol.id)],
12385
12751
  schemaHash: sourceSchemaHash,
12386
12752
  sourceHashes: { [manifest.sourceId]: manifest.contentHash },
12753
+ sourceSemanticHashes: { [manifest.sourceId]: manifest.semanticHash },
12387
12754
  confidence: 1
12388
12755
  }) : null;
12389
12756
  const preview = emptyGraphPage({
@@ -12402,6 +12769,7 @@ async function syncVaultArtifacts(rootDir, input) {
12402
12769
  ],
12403
12770
  schemaHash: sourceSchemaHash,
12404
12771
  sourceHashes: { [manifest.sourceId]: manifest.contentHash },
12772
+ sourceSemanticHashes: { [manifest.sourceId]: manifest.semanticHash },
12405
12773
  confidence: 1
12406
12774
  });
12407
12775
  const sourceRecord = await buildManagedGraphPage(
@@ -12518,6 +12886,7 @@ async function syncVaultArtifacts(rootDir, input) {
12518
12886
  aggregate.descriptions,
12519
12887
  aggregate.sourceAnalyses,
12520
12888
  aggregate.sourceHashes,
12889
+ aggregate.sourceSemanticHashes,
12521
12890
  schemaHash,
12522
12891
  metadata,
12523
12892
  relativePath,
@@ -12765,6 +13134,7 @@ async function syncVaultArtifacts(rootDir, input) {
12765
13134
  projectConfigHash: projectConfigHash(config),
12766
13135
  analyses: Object.fromEntries(input.analyses.map((analysis) => [analysis.sourceId, analysisSignature(analysis)])),
12767
13136
  sourceHashes: Object.fromEntries(input.manifests.map((manifest) => [manifest.sourceId, manifest.contentHash])),
13137
+ sourceSemanticHashes: Object.fromEntries(input.manifests.map((manifest) => [manifest.sourceId, manifest.semanticHash])),
12768
13138
  sourceProjects: input.sourceProjects,
12769
13139
  outputHashes: input.outputHashes,
12770
13140
  insightHashes: input.insightHashes,
@@ -13230,6 +13600,7 @@ function emptyCompileState() {
13230
13600
  projectConfigHash: "",
13231
13601
  analyses: {},
13232
13602
  sourceHashes: {},
13603
+ sourceSemanticHashes: {},
13233
13604
  sourceProjects: {},
13234
13605
  outputHashes: {},
13235
13606
  insightHashes: {},
@@ -13659,7 +14030,8 @@ async function initVault(rootDir, options = {}) {
13659
14030
  managed_by: "human",
13660
14031
  backlinks: [],
13661
14032
  schema_hash: "",
13662
- source_hashes: {}
14033
+ source_hashes: {},
14034
+ source_semantic_hashes: {}
13663
14035
  }
13664
14036
  )
13665
14037
  );
@@ -13682,7 +14054,8 @@ async function initVault(rootDir, options = {}) {
13682
14054
  managed_by: "system",
13683
14055
  backlinks: [],
13684
14056
  schema_hash: "",
13685
- source_hashes: {}
14057
+ source_hashes: {},
14058
+ source_semantic_hashes: {}
13686
14059
  })
13687
14060
  );
13688
14061
  await writeFileIfChanged(
@@ -13704,7 +14077,8 @@ async function initVault(rootDir, options = {}) {
13704
14077
  managed_by: "system",
13705
14078
  backlinks: [],
13706
14079
  schema_hash: "",
13707
- source_hashes: {}
14080
+ source_hashes: {},
14081
+ source_semantic_hashes: {}
13708
14082
  })
13709
14083
  );
13710
14084
  if (options.obsidian) {
@@ -13745,7 +14119,7 @@ async function compileVault(rootDir, options = {}) {
13745
14119
  );
13746
14120
  const nextProjectConfigHash = projectConfigHash(config);
13747
14121
  const projectConfigChanged = !previousState || previousState.projectConfigHash !== nextProjectConfigHash;
13748
- const previousSourceHashes = previousState?.sourceHashes ?? {};
14122
+ const previousSourceHashes = previousState?.sourceSemanticHashes ?? previousState?.sourceHashes ?? {};
13749
14123
  const previousAnalyses = previousState?.analyses ?? {};
13750
14124
  const previousSourceProjects = previousState?.sourceProjects ?? {};
13751
14125
  const previousOutputHashes = previousState?.outputHashes ?? {};
@@ -13760,7 +14134,7 @@ async function compileVault(rootDir, options = {}) {
13760
14134
  const dirty = [];
13761
14135
  const clean = [];
13762
14136
  for (const manifest of manifests) {
13763
- const hashChanged = previousSourceHashes[manifest.sourceId] !== manifest.contentHash;
14137
+ const hashChanged = previousSourceHashes[manifest.sourceId] !== manifest.semanticHash;
13764
14138
  const noAnalysis = !previousAnalyses[manifest.sourceId];
13765
14139
  const projectId = sourceProjects[manifest.sourceId] ?? null;
13766
14140
  const projectChanged = (previousSourceProjects[manifest.sourceId] ?? null) !== projectId;
@@ -14470,9 +14844,11 @@ function structuralLintFindings(_rootDir, paths, graph, schemas, manifests, sour
14470
14844
  relatedPageIds: [page.id]
14471
14845
  });
14472
14846
  }
14473
- for (const [sourceId, knownHash] of Object.entries(page.sourceHashes)) {
14847
+ const freshnessHashes = Object.keys(page.sourceSemanticHashes).length ? page.sourceSemanticHashes : page.sourceHashes;
14848
+ for (const [sourceId, knownHash] of Object.entries(freshnessHashes)) {
14474
14849
  const manifest = manifestMap.get(sourceId);
14475
- if (manifest && manifest.contentHash !== knownHash) {
14850
+ const manifestHash = manifest?.semanticHash ?? manifest?.contentHash;
14851
+ if (manifestHash && manifestHash !== knownHash) {
14476
14852
  findings.push({
14477
14853
  severity: "warning",
14478
14854
  code: "stale_page",
@@ -14611,7 +14987,7 @@ async function bootstrapDemo(rootDir, input) {
14611
14987
  }
14612
14988
 
14613
14989
  // src/mcp.ts
14614
- var SERVER_VERSION = "0.2.0";
14990
+ var SERVER_VERSION = "0.2.2";
14615
14991
  async function createMcpServer(rootDir) {
14616
14992
  const server = new McpServer({
14617
14993
  name: "swarmvault",
@@ -15984,6 +16360,124 @@ import { promisify } from "util";
15984
16360
  import matter10 from "gray-matter";
15985
16361
  import mime2 from "mime-types";
15986
16362
 
16363
+ // src/graph-presentation.ts
16364
+ var OVERVIEW_THRESHOLD = 5e3;
16365
+ var OVERVIEW_NODE_BUDGET = 1500;
16366
+ function nodePriority(node, pinnedNodeIds) {
16367
+ return [pinnedNodeIds.has(node.id) ? 0 : 1, -(node.degree ?? 0), -(node.bridgeScore ?? 0), node.label, node.id];
16368
+ }
16369
+ function compareTuples(left, right) {
16370
+ const length = Math.max(left.length, right.length);
16371
+ for (let index = 0; index < length; index += 1) {
16372
+ const leftValue = left[index];
16373
+ const rightValue = right[index];
16374
+ if (leftValue === rightValue) {
16375
+ continue;
16376
+ }
16377
+ if (typeof leftValue === "number" && typeof rightValue === "number") {
16378
+ return leftValue - rightValue;
16379
+ }
16380
+ return String(leftValue ?? "").localeCompare(String(rightValue ?? ""));
16381
+ }
16382
+ return 0;
16383
+ }
16384
+ function survivingHyperedges(hyperedges, sampledNodeIds) {
16385
+ return hyperedges.filter((hyperedge) => hyperedge.nodeIds.filter((nodeId) => sampledNodeIds.has(nodeId)).length >= 2);
16386
+ }
16387
+ function pinnedNodeIdsForReport(report) {
16388
+ if (!report) {
16389
+ return /* @__PURE__ */ new Set();
16390
+ }
16391
+ return /* @__PURE__ */ new Set([
16392
+ ...report.godNodes.map((node) => node.nodeId),
16393
+ ...report.bridgeNodes.map((node) => node.nodeId),
16394
+ ...report.surprisingConnections.flatMap((connection) => [connection.sourceNodeId, connection.targetNodeId])
16395
+ ]);
16396
+ }
16397
+ function sampleGraphNodes(graph, report, nodeBudget = OVERVIEW_NODE_BUDGET) {
16398
+ const pinned = pinnedNodeIdsForReport(report);
16399
+ const nodeById2 = new Map(graph.nodes.map((node) => [node.id, node]));
16400
+ const selected = new Set([...pinned].filter((nodeId) => nodeById2.has(nodeId)));
16401
+ const sortedCommunities2 = [...graph.communities ?? []].sort((left, right) => {
16402
+ const leftNodes = left.nodeIds.map((nodeId) => nodeById2.get(nodeId)).filter((node) => Boolean(node));
16403
+ const rightNodes = right.nodeIds.map((nodeId) => nodeById2.get(nodeId)).filter((node) => Boolean(node));
16404
+ const leftFirstParty = leftNodes.filter((node) => node.sourceClass === "first_party").length;
16405
+ const rightFirstParty = rightNodes.filter((node) => node.sourceClass === "first_party").length;
16406
+ return compareTuples(
16407
+ [-leftFirstParty, -leftNodes.length, left.label, left.id],
16408
+ [-rightFirstParty, -rightNodes.length, right.label, right.id]
16409
+ );
16410
+ });
16411
+ for (const community of sortedCommunities2) {
16412
+ const communityNodes = community.nodeIds.map((nodeId) => nodeById2.get(nodeId)).filter((node) => Boolean(node)).sort((left, right) => compareTuples(nodePriority(left, pinned), nodePriority(right, pinned)));
16413
+ for (const node of communityNodes) {
16414
+ if (selected.size >= nodeBudget && !pinned.has(node.id)) {
16415
+ break;
16416
+ }
16417
+ selected.add(node.id);
16418
+ }
16419
+ if (selected.size >= nodeBudget) {
16420
+ break;
16421
+ }
16422
+ }
16423
+ if (selected.size < nodeBudget) {
16424
+ for (const node of [...graph.nodes].sort((left, right) => compareTuples(nodePriority(left, pinned), nodePriority(right, pinned)))) {
16425
+ if (selected.size >= nodeBudget && !pinned.has(node.id)) {
16426
+ break;
16427
+ }
16428
+ selected.add(node.id);
16429
+ }
16430
+ }
16431
+ return selected;
16432
+ }
16433
+ function buildViewerGraphArtifact(graph, options = {}) {
16434
+ const threshold = options.threshold ?? OVERVIEW_THRESHOLD;
16435
+ const nodeBudget = options.nodeBudget ?? OVERVIEW_NODE_BUDGET;
16436
+ const totalCommunities = graph.communities?.length ?? 0;
16437
+ if (options.full || graph.nodes.length <= threshold) {
16438
+ return {
16439
+ ...graph,
16440
+ presentation: {
16441
+ mode: "full",
16442
+ threshold,
16443
+ nodeBudget,
16444
+ totalNodes: graph.nodes.length,
16445
+ displayedNodes: graph.nodes.length,
16446
+ totalEdges: graph.edges.length,
16447
+ displayedEdges: graph.edges.length,
16448
+ totalCommunities,
16449
+ displayedCommunities: totalCommunities
16450
+ }
16451
+ };
16452
+ }
16453
+ const sampledNodeIds = sampleGraphNodes(graph, options.report, nodeBudget);
16454
+ const nodes = graph.nodes.filter((node) => sampledNodeIds.has(node.id));
16455
+ const edges = graph.edges.filter((edge) => sampledNodeIds.has(edge.source) && sampledNodeIds.has(edge.target));
16456
+ const hyperedges = survivingHyperedges(graph.hyperedges ?? [], sampledNodeIds);
16457
+ const communities = (graph.communities ?? []).map((community) => ({
16458
+ ...community,
16459
+ nodeIds: community.nodeIds.filter((nodeId) => sampledNodeIds.has(nodeId))
16460
+ })).filter((community) => community.nodeIds.length > 0);
16461
+ return {
16462
+ ...graph,
16463
+ nodes,
16464
+ edges,
16465
+ hyperedges,
16466
+ communities,
16467
+ presentation: {
16468
+ mode: "overview",
16469
+ threshold,
16470
+ nodeBudget,
16471
+ totalNodes: graph.nodes.length,
16472
+ displayedNodes: nodes.length,
16473
+ totalEdges: graph.edges.length,
16474
+ displayedEdges: edges.length,
16475
+ totalCommunities,
16476
+ displayedCommunities: communities.length
16477
+ }
16478
+ };
16479
+ }
16480
+
15987
16481
  // src/watch.ts
15988
16482
  import path26 from "path";
15989
16483
  import process3 from "process";
@@ -16449,7 +16943,7 @@ async function ensureViewerDist(viewerDistDir) {
16449
16943
  await execFileAsync("pnpm", ["build"], { cwd: viewerProjectDir });
16450
16944
  }
16451
16945
  }
16452
- async function startGraphServer(rootDir, port) {
16946
+ async function startGraphServer(rootDir, port, options = {}) {
16453
16947
  const { config, paths } = await loadVaultConfig(rootDir);
16454
16948
  const effectivePort = port ?? config.viewer.port;
16455
16949
  await ensureViewerDist(paths.viewerDistDir);
@@ -16461,8 +16955,16 @@ async function startGraphServer(rootDir, port) {
16461
16955
  response.end(JSON.stringify({ error: "Graph artifact not found. Run `swarmvault compile` first." }));
16462
16956
  return;
16463
16957
  }
16958
+ const graph = await readJsonFile(paths.graphPath);
16959
+ if (!graph) {
16960
+ response.writeHead(404, { "content-type": "application/json" });
16961
+ response.end(JSON.stringify({ error: "Graph artifact not found. Run `swarmvault compile` first." }));
16962
+ return;
16963
+ }
16964
+ const reportPath = path27.join(paths.wikiDir, "graph", "report.json");
16965
+ const report = await readJsonFile(reportPath) ?? null;
16464
16966
  response.writeHead(200, { "content-type": "application/json" });
16465
- response.end(await fs22.readFile(paths.graphPath, "utf8"));
16967
+ response.end(JSON.stringify(buildViewerGraphArtifact(graph, { report, full: options.full ?? false })));
16466
16968
  return;
16467
16969
  }
16468
16970
  if (url.pathname === "/api/graph/query") {
@@ -16638,7 +17140,7 @@ async function startGraphServer(rootDir, port) {
16638
17140
  }
16639
17141
  };
16640
17142
  }
16641
- async function exportGraphHtml(rootDir, outputPath) {
17143
+ async function exportGraphHtml(rootDir, outputPath, options = {}) {
16642
17144
  const { paths } = await loadVaultConfig(rootDir);
16643
17145
  const graph = await readJsonFile(paths.graphPath);
16644
17146
  if (!graph) {
@@ -16682,7 +17184,11 @@ async function exportGraphHtml(rootDir, outputPath) {
16682
17184
  const script = await fs22.readFile(scriptPath, "utf8");
16683
17185
  const style = stylePath && await fileExists(stylePath) ? await fs22.readFile(stylePath, "utf8") : "";
16684
17186
  const report = await readJsonFile(path27.join(paths.wikiDir, "graph", "report.json"));
16685
- const embeddedData = JSON.stringify({ graph, pages: pages.filter(Boolean), report }, null, 2).replace(/</g, "\\u003c");
17187
+ const embeddedData = JSON.stringify(
17188
+ { graph: buildViewerGraphArtifact(graph, { report, full: options.full ?? false }), pages: pages.filter(Boolean), report },
17189
+ null,
17190
+ 2
17191
+ ).replace(/</g, "\\u003c");
16686
17192
  const html = [
16687
17193
  "<!doctype html>",
16688
17194
  '<html lang="en">',