@kage-core/kage-graph-mcp 1.1.20 → 1.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/kernel.js CHANGED
@@ -42,6 +42,7 @@ exports.indexesDir = indexesDir;
42
42
  exports.graphDir = graphDir;
43
43
  exports.graphRegistryDir = graphRegistryDir;
44
44
  exports.codeGraphDir = codeGraphDir;
45
+ exports.structuralIndexDir = structuralIndexDir;
45
46
  exports.branchesDir = branchesDir;
46
47
  exports.reviewDir = reviewDir;
47
48
  exports.publicBundleDir = publicBundleDir;
@@ -63,6 +64,8 @@ exports.catalogDomainNodeCount = catalogDomainNodeCount;
63
64
  exports.ensureMemoryDirs = ensureMemoryDirs;
64
65
  exports.loadApprovedPackets = loadApprovedPackets;
65
66
  exports.loadPendingPackets = loadPendingPackets;
67
+ exports.buildStructuralFileForWorker = buildStructuralFileForWorker;
68
+ exports.buildStructuralIndex = buildStructuralIndex;
66
69
  exports.writeLspSymbolIndex = writeLspSymbolIndex;
67
70
  exports.writeCodeIndex = writeCodeIndex;
68
71
  exports.buildCodeGraph = buildCodeGraph;
@@ -115,7 +118,9 @@ exports.changelog = changelog;
115
118
  const node_crypto_1 = require("node:crypto");
116
119
  const node_child_process_1 = require("node:child_process");
117
120
  const node_fs_1 = require("node:fs");
121
+ const node_os_1 = require("node:os");
118
122
  const node_path_1 = require("node:path");
123
+ const node_worker_threads_1 = require("node:worker_threads");
119
124
  const ts = __importStar(require("typescript"));
120
125
  const index_js_1 = require("./registry/index.js");
121
126
  exports.PACKET_SCHEMA_VERSION = 2;
@@ -286,6 +291,9 @@ function graphRegistryDir(projectDir) {
286
291
  function codeGraphDir(projectDir) {
287
292
  return (0, node_path_1.join)(memoryRoot(projectDir), "code_graph");
288
293
  }
294
+ function structuralIndexDir(projectDir) {
295
+ return (0, node_path_1.join)(memoryRoot(projectDir), "structural");
296
+ }
289
297
  function branchesDir(projectDir) {
290
298
  return (0, node_path_1.join)(memoryRoot(projectDir), "branches");
291
299
  }
@@ -1300,10 +1308,11 @@ const CODE_EXTENSIONS = new Set([
1300
1308
  ".swift",
1301
1309
  ]);
1302
1310
  const MAX_CODE_FILE_BYTES = positiveIntEnv("KAGE_MAX_CODE_FILE_BYTES", 512 * 1024);
1303
- const MAX_CODE_GRAPH_FILES = positiveIntEnv("KAGE_MAX_CODE_GRAPH_FILES", 2000);
1304
- const MAX_CODE_GRAPH_SYMBOLS = positiveIntEnv("KAGE_MAX_CODE_GRAPH_SYMBOLS", 25000);
1305
1311
  const MAX_CODE_GRAPH_CALLS = positiveIntEnv("KAGE_MAX_CODE_GRAPH_CALLS", 50000);
1306
1312
  const MAX_CODE_GRAPH_CALLS_PER_FILE = positiveIntEnv("KAGE_MAX_CODE_GRAPH_CALLS_PER_FILE", 250);
1313
+ const MAX_STRUCTURAL_EXTRACT_FILE_BYTES = positiveIntEnv("KAGE_MAX_STRUCTURAL_EXTRACT_FILE_BYTES", MAX_CODE_FILE_BYTES);
1314
+ const MAX_STRUCTURAL_WORKERS = positiveIntEnv("KAGE_STRUCTURAL_WORKERS", Math.max(1, Math.min(8, (0, node_os_1.availableParallelism)() - 1)));
1315
+ const MIN_STRUCTURAL_PARALLEL_FILES = positiveIntEnv("KAGE_STRUCTURAL_PARALLEL_MIN_FILES", 64);
1307
1316
  const CONFIG_NAMES = new Set([
1308
1317
  "package.json",
1309
1318
  "pyproject.toml",
@@ -1420,11 +1429,9 @@ function emptyCodeIndexManifest(projectDir) {
1420
1429
  project_dir: projectDir,
1421
1430
  repo_key: repoKey(projectDir),
1422
1431
  generated_at: nowIso(),
1423
- mode: "quick",
1432
+ mode: "structural",
1424
1433
  limits: {
1425
- max_file_bytes: MAX_CODE_FILE_BYTES,
1426
- max_files: MAX_CODE_GRAPH_FILES,
1427
- max_symbols: MAX_CODE_GRAPH_SYMBOLS,
1434
+ max_extract_file_bytes: MAX_STRUCTURAL_EXTRACT_FILE_BYTES,
1428
1435
  max_calls: MAX_CODE_GRAPH_CALLS,
1429
1436
  max_calls_per_file: MAX_CODE_GRAPH_CALLS_PER_FILE,
1430
1437
  },
@@ -1447,10 +1454,283 @@ function emptyCodeIndexManifest(projectDir) {
1447
1454
  function codeIndexManifestPath(projectDir) {
1448
1455
  return (0, node_path_1.join)(codeGraphDir(projectDir), "index-manifest.json");
1449
1456
  }
1450
- function codeIndexSelection(projectDir) {
1451
- const candidates = [];
1452
- const deferred = [];
1457
+ function writeCodeIndexManifest(projectDir, manifest) {
1458
+ writeJson(codeIndexManifestPath(projectDir), manifest);
1459
+ }
1460
+ function readCodeIndexManifest(projectDir) {
1461
+ const path = codeIndexManifestPath(projectDir);
1462
+ if (!(0, node_fs_1.existsSync)(path))
1463
+ return emptyCodeIndexManifest(projectDir);
1464
+ try {
1465
+ const manifest = readJson(path);
1466
+ if (!manifest.cache)
1467
+ manifest.cache = { hits: 0, misses: 0 };
1468
+ return manifest;
1469
+ }
1470
+ catch {
1471
+ return emptyCodeIndexManifest(projectDir);
1472
+ }
1473
+ }
1474
+ function codeIndexManifestFromStructural(projectDir, structural, fingerprint, cache) {
1475
+ const manifest = emptyCodeIndexManifest(projectDir);
1476
+ const metadataOnly = structural.files
1477
+ .filter((file) => file.extraction === "metadata-only")
1478
+ .map((file) => ({ path: file.path, size_bytes: file.size_bytes, reason: "over_structural_extract_file_size_limit" }));
1479
+ manifest.mode = "structural";
1480
+ manifest.coverage = {
1481
+ indexable_files: structural.manifest.files.total,
1482
+ indexed_files: structural.manifest.files.indexed,
1483
+ deferred_files: metadataOnly.length,
1484
+ ignored_files: structural.manifest.files.ignored,
1485
+ coverage_percent: percent(structural.manifest.files.indexed, structural.manifest.files.total),
1486
+ complete: metadataOnly.length === 0,
1487
+ };
1488
+ manifest.cache = cache;
1489
+ manifest.fingerprint = fingerprint;
1490
+ manifest.deferred_files = metadataOnly.sort((a, b) => a.path.localeCompare(b.path));
1491
+ manifest.ignored_summary = structural.manifest.ignored_summary;
1492
+ return manifest;
1493
+ }
1494
+ function listCodeFiles(projectDir) {
1495
+ return scanStructuralFiles(projectDir).files;
1496
+ }
1497
+ function codeFileFromStructural(file) {
1498
+ return {
1499
+ id: `file:${slugify(file.path)}`,
1500
+ path: file.path,
1501
+ language: file.language,
1502
+ parser: file.extraction === "metadata-only" ? "metadata" : codeParser(file.path),
1503
+ kind: file.kind,
1504
+ size_bytes: file.size_bytes,
1505
+ line_count: file.line_count,
1506
+ hash: file.hash,
1507
+ };
1508
+ }
1509
+ function codeSymbolFromStructural(symbol) {
1510
+ return {
1511
+ id: symbol.id,
1512
+ name: symbol.name,
1513
+ kind: symbol.kind,
1514
+ path: symbol.path,
1515
+ language: symbol.language,
1516
+ parser: symbol.parser,
1517
+ export: symbol.export ?? false,
1518
+ line: symbol.line,
1519
+ end_line: symbol.end_line ?? null,
1520
+ signature: symbol.signature ?? `${symbol.name}()`,
1521
+ };
1522
+ }
1523
+ function importKey(item) {
1524
+ return `${item.from_path}\0${item.to_path ?? ""}\0${item.specifier}\0${item.line}\0${item.kind}`;
1525
+ }
1526
+ function compactCodeGraphArtifact(projectDir, graph, structural) {
1527
+ const structuralFiles = new Map(structural.files.map((file) => [file.path, codeFileFromStructural(file)]));
1528
+ const structuralSymbols = new Map(structural.symbols.map((symbol) => [symbol.id, codeSymbolFromStructural(symbol)]));
1529
+ const structuralImports = new Set(structural.imports.map(importKey));
1530
+ const fileParserOverrides = graph.files
1531
+ .filter((file) => structuralFiles.get(file.path)?.parser !== file.parser)
1532
+ .map((file) => [file.path, file.parser]);
1533
+ const symbolParserOverrides = graph.symbols
1534
+ .filter((symbol) => structuralSymbols.has(symbol.id) && structuralSymbols.get(symbol.id)?.parser !== symbol.parser)
1535
+ .map((symbol) => [symbol.id, symbol.parser]);
1536
+ const extraSymbols = graph.symbols.filter((symbol) => !structuralSymbols.has(symbol.id));
1537
+ const extraImports = graph.imports.filter((item) => !structuralImports.has(importKey(item)));
1538
+ return {
1539
+ schema_version: 1,
1540
+ compact: true,
1541
+ artifact_format: 2,
1542
+ project_dir: graph.project_dir,
1543
+ repo_key: graph.repo_key,
1544
+ generated_at: graph.generated_at,
1545
+ repo_state: graph.repo_state,
1546
+ refs: {
1547
+ files: (0, node_path_1.relative)(codeGraphDir(projectDir), (0, node_path_1.join)(structuralIndexDir(projectDir), "files.json")).replace(/\\/g, "/"),
1548
+ symbols: (0, node_path_1.relative)(codeGraphDir(projectDir), (0, node_path_1.join)(structuralIndexDir(projectDir), "symbols.json")).replace(/\\/g, "/"),
1549
+ imports: (0, node_path_1.relative)(codeGraphDir(projectDir), (0, node_path_1.join)(structuralIndexDir(projectDir), "imports.json")).replace(/\\/g, "/"),
1550
+ },
1551
+ ...(fileParserOverrides.length ? { file_parser_overrides: fileParserOverrides } : {}),
1552
+ ...(symbolParserOverrides.length ? { symbol_parser_overrides: symbolParserOverrides } : {}),
1553
+ ...(extraSymbols.length ? { extra_symbols: extraSymbols } : {}),
1554
+ ...(extraImports.length ? { extra_imports: extraImports } : {}),
1555
+ calls: graph.calls,
1556
+ routes: graph.routes,
1557
+ tests: graph.tests,
1558
+ packages: graph.packages,
1559
+ };
1560
+ }
1561
+ function isCompactCodeGraphArtifact(value) {
1562
+ return Boolean(value && typeof value === "object" && value.compact === true && value.artifact_format === 2);
1563
+ }
1564
+ function hydrateCodeGraphArtifact(projectDir, artifact, structural) {
1565
+ if (artifact.compact === true && !isCompactCodeGraphArtifact(artifact))
1566
+ return null;
1567
+ if (!isCompactCodeGraphArtifact(artifact))
1568
+ return artifact;
1569
+ const index = structural ?? readCurrentStructuralIndex(projectDir);
1570
+ if (!index)
1571
+ return null;
1572
+ return {
1573
+ schema_version: 1,
1574
+ project_dir: artifact.project_dir,
1575
+ repo_key: artifact.repo_key,
1576
+ generated_at: artifact.generated_at,
1577
+ repo_state: artifact.repo_state,
1578
+ files: index.files.map(codeFileFromStructural).map((file) => {
1579
+ const override = artifact.file_parser_overrides?.find(([path]) => path === file.path);
1580
+ return override ? { ...file, parser: override[1] } : file;
1581
+ }).sort((a, b) => a.path.localeCompare(b.path)),
1582
+ symbols: [
1583
+ ...index.symbols.map(codeSymbolFromStructural).map((symbol) => {
1584
+ const override = artifact.symbol_parser_overrides?.find(([id]) => id === symbol.id);
1585
+ return override ? { ...symbol, parser: override[1] } : symbol;
1586
+ }),
1587
+ ...(artifact.extra_symbols ?? []),
1588
+ ].sort((a, b) => a.path.localeCompare(b.path) || a.line - b.line || a.name.localeCompare(b.name)),
1589
+ imports: [
1590
+ ...index.imports,
1591
+ ...(artifact.extra_imports ?? []),
1592
+ ].sort((a, b) => a.from_path.localeCompare(b.from_path) || a.line - b.line || a.specifier.localeCompare(b.specifier)),
1593
+ calls: artifact.calls ?? [],
1594
+ routes: artifact.routes ?? [],
1595
+ tests: artifact.tests ?? [],
1596
+ packages: artifact.packages ?? [],
1597
+ };
1598
+ }
1599
+ function removeLegacyCodeGraphSplits(projectDir) {
1600
+ for (const name of ["files.json", "symbols.json", "imports.json", "calls.json", "routes.json", "tests.json", "packages.json"]) {
1601
+ (0, node_fs_1.rmSync)((0, node_path_1.join)(codeGraphDir(projectDir), name), { force: true });
1602
+ }
1603
+ }
1604
+ function readCachedCodeGraph(projectDir, fingerprint, structural) {
1605
+ const path = (0, node_path_1.join)(codeGraphDir(projectDir), "graph.json");
1606
+ if (!(0, node_fs_1.existsSync)(path))
1607
+ return null;
1608
+ try {
1609
+ const artifact = readJson(path);
1610
+ if (readCodeIndexManifest(projectDir).fingerprint !== fingerprint)
1611
+ return null;
1612
+ if (!isCompactCodeGraphArtifact(artifact))
1613
+ return null;
1614
+ return hydrateCodeGraphArtifact(projectDir, artifact, structural);
1615
+ }
1616
+ catch {
1617
+ return null;
1618
+ }
1619
+ }
1620
+ function structuralManifestPath(projectDir) {
1621
+ return (0, node_path_1.join)(structuralIndexDir(projectDir), "manifest.json");
1622
+ }
1623
+ function structuralFileCacheDir(projectDir) {
1624
+ return (0, node_path_1.join)(structuralIndexDir(projectDir), "file-cache");
1625
+ }
1626
+ function structuralPackedFileCachePath(projectDir) {
1627
+ return (0, node_path_1.join)(structuralIndexDir(projectDir), "file-cache.json");
1628
+ }
1629
+ function structuralFileCachePath(projectDir, rel, hash) {
1630
+ return (0, node_path_1.join)(structuralFileCacheDir(projectDir), `${slugify(rel)}-${hash}.json`);
1631
+ }
1632
+ function emptyStructuralIndexManifest(projectDir) {
1633
+ return {
1634
+ schema_version: 1,
1635
+ project_dir: projectDir,
1636
+ repo_key: repoKey(projectDir),
1637
+ generated_at: nowIso(),
1638
+ provider: "kage-structural",
1639
+ limits: {
1640
+ max_extract_file_bytes: MAX_STRUCTURAL_EXTRACT_FILE_BYTES,
1641
+ max_workers: MAX_STRUCTURAL_WORKERS,
1642
+ min_parallel_files: MIN_STRUCTURAL_PARALLEL_FILES,
1643
+ },
1644
+ files: {
1645
+ total: 0,
1646
+ indexed: 0,
1647
+ metadata_only: 0,
1648
+ ignored: 0,
1649
+ },
1650
+ cache: {
1651
+ hits: 0,
1652
+ misses: 0,
1653
+ },
1654
+ symbols: 0,
1655
+ imports: 0,
1656
+ edges: 0,
1657
+ languages: {},
1658
+ worker_count: 0,
1659
+ ignored_summary: {},
1660
+ deleted_files: [],
1661
+ fingerprint: "",
1662
+ file_entries: {},
1663
+ };
1664
+ }
1665
+ function readStructuralIndexManifest(projectDir) {
1666
+ const path = structuralManifestPath(projectDir);
1667
+ if (!(0, node_fs_1.existsSync)(path))
1668
+ return emptyStructuralIndexManifest(projectDir);
1669
+ try {
1670
+ const manifest = readJson(path);
1671
+ if (manifest.schema_version !== 1 || manifest.provider !== "kage-structural")
1672
+ return emptyStructuralIndexManifest(projectDir);
1673
+ if (!manifest.file_entries)
1674
+ manifest.file_entries = {};
1675
+ if (!manifest.cache)
1676
+ manifest.cache = { hits: 0, misses: 0 };
1677
+ return manifest;
1678
+ }
1679
+ catch {
1680
+ return emptyStructuralIndexManifest(projectDir);
1681
+ }
1682
+ }
1683
+ function writeStructuralIndexManifest(projectDir, manifest) {
1684
+ writeJson(structuralManifestPath(projectDir), manifest);
1685
+ }
1686
+ function readKageIgnore(projectDir) {
1687
+ const path = (0, node_path_1.join)(projectDir, ".kageignore");
1688
+ if (!(0, node_fs_1.existsSync)(path))
1689
+ return [];
1690
+ return (0, node_fs_1.readFileSync)(path, "utf8")
1691
+ .split(/\r?\n/)
1692
+ .map((line) => line.trim())
1693
+ .filter((line) => line.length > 0 && !line.startsWith("#"));
1694
+ }
1695
+ function wildcardPattern(pattern) {
1696
+ const escaped = pattern
1697
+ .replace(/[.+^${}()|[\]\\]/g, "\\$&")
1698
+ .replace(/\*\*/g, "\0")
1699
+ .replace(/\*/g, "[^/]*")
1700
+ .replace(/\0/g, ".*");
1701
+ return new RegExp(`^${escaped}$`);
1702
+ }
1703
+ function kageIgnoreMatches(rel, pattern) {
1704
+ const normalized = pattern.replace(/\\/g, "/").replace(/^\/+/, "");
1705
+ if (!normalized)
1706
+ return false;
1707
+ if (normalized.endsWith("/"))
1708
+ return rel === normalized.slice(0, -1) || rel.startsWith(normalized);
1709
+ if (normalized.includes("*"))
1710
+ return wildcardPattern(normalized).test(rel);
1711
+ return rel === normalized || rel.startsWith(`${normalized}/`) || rel.split("/").includes(normalized);
1712
+ }
1713
+ function isKageIgnored(rel, patterns) {
1714
+ let ignored = false;
1715
+ for (const pattern of patterns) {
1716
+ if (pattern.startsWith("!")) {
1717
+ if (kageIgnoreMatches(rel, pattern.slice(1)))
1718
+ ignored = false;
1719
+ continue;
1720
+ }
1721
+ if (kageIgnoreMatches(rel, pattern))
1722
+ ignored = true;
1723
+ }
1724
+ return ignored;
1725
+ }
1726
+ function isStructuralIndexable(rel) {
1727
+ const extension = extensionOf(rel);
1728
+ return CODE_EXTENSIONS.has(extension) || CONFIG_NAMES.has((0, node_path_1.basename)(rel)) || rel === "README.md";
1729
+ }
1730
+ function scanStructuralFiles(projectDir) {
1731
+ const files = [];
1453
1732
  const ignoredSummary = {};
1733
+ const ignorePatterns = readKageIgnore(projectDir);
1454
1734
  const ignore = (reason) => {
1455
1735
  ignoredSummary[reason] = (ignoredSummary[reason] ?? 0) + 1;
1456
1736
  };
@@ -1464,110 +1744,219 @@ function codeIndexSelection(projectDir) {
1464
1744
  ignore("generated_vendor_or_cache");
1465
1745
  continue;
1466
1746
  }
1747
+ if (isKageIgnored(rel, ignorePatterns)) {
1748
+ ignore("kageignore");
1749
+ continue;
1750
+ }
1467
1751
  const stats = (0, node_fs_1.statSync)(absolutePath);
1468
1752
  if (stats.isDirectory()) {
1469
1753
  visit(absolutePath);
1470
1754
  continue;
1471
1755
  }
1472
- const extension = extensionOf(rel);
1473
- const indexable = CODE_EXTENSIONS.has(extension) || CONFIG_NAMES.has((0, node_path_1.basename)(rel)) || rel === "README.md";
1474
- if (!indexable) {
1756
+ if (!isStructuralIndexable(rel)) {
1475
1757
  ignore("unsupported_file_type");
1476
1758
  continue;
1477
1759
  }
1478
- if (stats.size > MAX_CODE_FILE_BYTES) {
1479
- deferred.push({ path: rel, size_bytes: stats.size, reason: "over_quick_file_size_limit" });
1480
- continue;
1481
- }
1482
- candidates.push(absolutePath);
1760
+ files.push(absolutePath);
1483
1761
  }
1484
1762
  };
1485
1763
  visit(projectDir);
1486
- const sorted = candidates.sort((a, b) => codeFilePriority(projectDir, a) - codeFilePriority(projectDir, b) || a.localeCompare(b));
1487
- const indexableFiles = sorted.length + deferred.length;
1488
- const files = sorted.slice(0, MAX_CODE_GRAPH_FILES);
1489
- for (const absolutePath of sorted.slice(MAX_CODE_GRAPH_FILES)) {
1490
- const rel = (0, node_path_1.relative)(projectDir, absolutePath).replace(/\\/g, "/");
1491
- deferred.push({ path: rel, size_bytes: (0, node_fs_1.statSync)(absolutePath).size, reason: "over_quick_file_count_limit" });
1492
- }
1493
- const manifest = emptyCodeIndexManifest(projectDir);
1494
- manifest.coverage = {
1495
- indexable_files: indexableFiles,
1496
- indexed_files: files.length,
1497
- deferred_files: deferred.length,
1498
- ignored_files: Object.values(ignoredSummary).reduce((sum, count) => sum + count, 0),
1499
- coverage_percent: percent(files.length, indexableFiles),
1500
- complete: deferred.length === 0,
1764
+ return {
1765
+ files: files.sort((a, b) => codeFilePriority(projectDir, a) - codeFilePriority(projectDir, b) || a.localeCompare(b)),
1766
+ ignoredSummary: Object.fromEntries(Object.entries(ignoredSummary).sort(([a], [b]) => a.localeCompare(b))),
1501
1767
  };
1502
- manifest.deferred_files = deferred.sort((a, b) => a.path.localeCompare(b.path));
1503
- manifest.ignored_summary = Object.fromEntries(Object.entries(ignoredSummary).sort(([a], [b]) => a.localeCompare(b)));
1504
- return { files, manifest };
1505
1768
  }
1506
- function writeCodeIndexManifest(projectDir, manifest) {
1507
- writeJson(codeIndexManifestPath(projectDir), manifest);
1769
+ function countBufferLines(buffer) {
1770
+ if (buffer.length === 0)
1771
+ return 0;
1772
+ let lines = 1;
1773
+ for (const byte of buffer) {
1774
+ if (byte === 10)
1775
+ lines += 1;
1776
+ }
1777
+ return lines;
1778
+ }
1779
+ function structuralConcepts(rel, symbols) {
1780
+ const pathTerms = rel
1781
+ .replace(/\.[^.]+$/, "")
1782
+ .split(/[\/_.-]+/)
1783
+ .flatMap((term) => term.split(/(?=[A-Z])/));
1784
+ const symbolTerms = symbols.flatMap((symbol) => symbol.name.split(/[_\W]+|(?=[A-Z])/));
1785
+ return unique([...pathTerms, ...symbolTerms]
1786
+ .map((term) => term.toLowerCase())
1787
+ .filter((term) => term.length >= 3 && !["src", "lib", "test", "spec", "index"].includes(term)))
1788
+ .slice(0, 16);
1789
+ }
1790
+ function structuralSignals(rel, content, kind) {
1791
+ const signals = new Set([kind]);
1792
+ if (rel === "README.md")
1793
+ signals.add("readme");
1794
+ if (CONFIG_NAMES.has((0, node_path_1.basename)(rel)))
1795
+ signals.add("config");
1796
+ if (content && /\b(app|router)\.(get|post|put|patch|delete)\s*\(/.test(content))
1797
+ signals.add("http-route");
1798
+ if (content && /\b(describe|it|test)\s*\(/.test(content))
1799
+ signals.add("test-suite");
1800
+ if (content && /\b(auth|login|token|session)\b/i.test(content))
1801
+ signals.add("auth");
1802
+ return [...signals].sort();
1803
+ }
1804
+ function structuralEdgesFromFacts(rel, symbols, imports) {
1805
+ const fileId = `file:${slugify(rel)}`;
1806
+ return [
1807
+ ...symbols.map((symbol) => ({
1808
+ source: fileId,
1809
+ target: symbol.id,
1810
+ relation: "contains",
1811
+ confidence: "EXTRACTED",
1812
+ source_file: rel,
1813
+ source_location: `L${symbol.line}`,
1814
+ weight: 1,
1815
+ })),
1816
+ ...imports.map((item) => ({
1817
+ source: fileId,
1818
+ target: item.to_path ? `file:${slugify(item.to_path)}` : `external:${slugify(item.specifier)}`,
1819
+ relation: "imports",
1820
+ confidence: item.to_path ? "EXTRACTED" : "AMBIGUOUS",
1821
+ source_file: rel,
1822
+ source_location: `L${item.line}`,
1823
+ weight: item.to_path ? 1 : 0.5,
1824
+ })),
1825
+ ];
1508
1826
  }
1509
- function readCodeIndexManifest(projectDir) {
1510
- const path = codeIndexManifestPath(projectDir);
1511
- if (!(0, node_fs_1.existsSync)(path))
1512
- return emptyCodeIndexManifest(projectDir);
1513
- try {
1514
- const manifest = readJson(path);
1515
- if (!manifest.cache)
1516
- manifest.cache = { hits: 0, misses: 0 };
1517
- return manifest;
1518
- }
1519
- catch {
1520
- return emptyCodeIndexManifest(projectDir);
1521
- }
1827
+ function compactStructuralCachedFile(cached) {
1828
+ return {
1829
+ schema_version: 2,
1830
+ path: cached.path,
1831
+ hash: cached.hash,
1832
+ file: [
1833
+ cached.file.language,
1834
+ cached.file.kind,
1835
+ cached.file.size_bytes,
1836
+ cached.file.line_count,
1837
+ cached.file.hash,
1838
+ cached.file.mtime_ms,
1839
+ cached.file.extraction,
1840
+ cached.file.confidence,
1841
+ cached.file.top_symbols,
1842
+ cached.file.imports_preview,
1843
+ cached.file.signals,
1844
+ cached.file.concepts,
1845
+ ],
1846
+ symbols: cached.symbols.map((symbol) => [
1847
+ symbol.id,
1848
+ symbol.name,
1849
+ symbol.kind,
1850
+ symbol.parser,
1851
+ symbol.export,
1852
+ symbol.line,
1853
+ symbol.end_line,
1854
+ symbol.signature,
1855
+ symbol.confidence,
1856
+ ]),
1857
+ imports: cached.imports.map((item) => [
1858
+ item.to_path,
1859
+ item.specifier,
1860
+ item.imported,
1861
+ item.kind,
1862
+ item.parser,
1863
+ item.line,
1864
+ ]),
1865
+ };
1522
1866
  }
1523
- function listCodeFiles(projectDir) {
1524
- return codeIndexSelection(projectDir).files;
1867
+ function expandCompactStructuralCachedFile(compact) {
1868
+ if (!Array.isArray(compact.file) || !Array.isArray(compact.symbols) || !Array.isArray(compact.imports))
1869
+ return null;
1870
+ const [language, kind, sizeBytes, lineCount, shortHash, mtimeMs, extraction, confidence, topSymbols, importsPreview, signals, concepts] = compact.file;
1871
+ const file = {
1872
+ schema_version: 1,
1873
+ path: compact.path,
1874
+ language,
1875
+ kind,
1876
+ size_bytes: sizeBytes,
1877
+ line_count: lineCount,
1878
+ hash: shortHash,
1879
+ mtime_ms: mtimeMs,
1880
+ extraction,
1881
+ confidence,
1882
+ top_symbols: topSymbols,
1883
+ imports_preview: importsPreview,
1884
+ signals,
1885
+ concepts,
1886
+ };
1887
+ const symbols = compact.symbols.map((symbol) => ({
1888
+ id: symbol[0],
1889
+ name: symbol[1],
1890
+ kind: symbol[2],
1891
+ path: compact.path,
1892
+ language,
1893
+ parser: symbol[3],
1894
+ export: symbol[4],
1895
+ line: symbol[5],
1896
+ end_line: symbol[6],
1897
+ signature: symbol[7],
1898
+ confidence: symbol[8],
1899
+ }));
1900
+ const imports = compact.imports.map((item) => ({
1901
+ from_path: compact.path,
1902
+ to_path: item[0],
1903
+ specifier: item[1],
1904
+ imported: item[2],
1905
+ kind: item[3],
1906
+ parser: item[4],
1907
+ line: item[5],
1908
+ }));
1909
+ return {
1910
+ schema_version: 1,
1911
+ path: compact.path,
1912
+ hash: compact.hash,
1913
+ file,
1914
+ symbols,
1915
+ imports,
1916
+ edges: structuralEdgesFromFacts(compact.path, symbols, imports),
1917
+ };
1525
1918
  }
1526
- function codeGraphStatFingerprint(projectDir, absoluteFiles) {
1527
- const entries = [
1528
- ...absoluteFiles,
1529
- ...externalIndexFiles(projectDir).map((index) => index.path),
1530
- ...["package.json", "requirements.txt", "go.mod", "Cargo.toml"]
1531
- .map((path) => (0, node_path_1.join)(projectDir, path))
1532
- .filter((path) => (0, node_fs_1.existsSync)(path)),
1533
- ]
1534
- .filter((path) => (0, node_fs_1.existsSync)(path))
1535
- .map((path) => {
1536
- const stats = (0, node_fs_1.statSync)(path);
1537
- return `${projectRelative(projectDir, path)}:${stats.size}:${Math.round(stats.mtimeMs)}`;
1538
- })
1539
- .sort();
1540
- return sha256Hex(entries.join("\n"));
1919
+ const packedStructuralCache = new Map();
1920
+ function structuralPackedCacheKey(rel, hash) {
1921
+ return `${rel}\0${hash}`;
1541
1922
  }
1542
- function readCachedCodeGraph(projectDir, fingerprint) {
1543
- const path = (0, node_path_1.join)(codeGraphDir(projectDir), "graph.json");
1923
+ function readPackedStructuralCache(projectDir) {
1924
+ const path = structuralPackedFileCachePath(projectDir);
1544
1925
  if (!(0, node_fs_1.existsSync)(path))
1545
- return null;
1926
+ return {};
1927
+ const stats = (0, node_fs_1.statSync)(path);
1928
+ const cacheKey = (0, node_path_1.resolve)(projectDir);
1929
+ const cached = packedStructuralCache.get(cacheKey);
1930
+ if (cached && cached.mtimeMs === stats.mtimeMs && cached.size === stats.size)
1931
+ return cached.entries;
1546
1932
  try {
1547
- const graph = readJson(path);
1548
- if (readCodeIndexManifest(projectDir).fingerprint !== fingerprint)
1549
- return null;
1550
- return graph;
1933
+ const packed = readJson(path);
1934
+ const entries = packed.schema_version === 1 && packed.provider === "kage-structural-file-cache" && packed.entries ? packed.entries : {};
1935
+ packedStructuralCache.set(cacheKey, { mtimeMs: stats.mtimeMs, size: stats.size, entries });
1936
+ return entries;
1551
1937
  }
1552
1938
  catch {
1553
- return null;
1939
+ return {};
1554
1940
  }
1555
1941
  }
1556
- function fileFactCacheDir(projectDir) {
1557
- return (0, node_path_1.join)(codeGraphDir(projectDir), "file-cache");
1558
- }
1559
- function fileFactCachePath(projectDir, rel, hash) {
1560
- return (0, node_path_1.join)(fileFactCacheDir(projectDir), `${slugify(rel)}-${hash}.json`);
1561
- }
1562
- function readCachedFileFacts(projectDir, rel, hash) {
1563
- const path = fileFactCachePath(projectDir, rel, hash);
1942
+ function readCachedStructuralFile(projectDir, rel, hash) {
1943
+ const packed = readPackedStructuralCache(projectDir)[structuralPackedCacheKey(rel, hash)];
1944
+ if (packed) {
1945
+ const expanded = expandCompactStructuralCachedFile(packed);
1946
+ if (expanded && expanded.path === rel && expanded.hash === hash)
1947
+ return expanded;
1948
+ }
1949
+ const path = structuralFileCachePath(projectDir, rel, hash);
1564
1950
  if (!(0, node_fs_1.existsSync)(path))
1565
1951
  return null;
1566
1952
  try {
1567
- const cached = readJson(path);
1568
- if (cached.schema_version !== 1 || cached.path !== rel || cached.hash !== hash)
1953
+ const raw = readJson(path);
1954
+ const cached = raw.schema_version === 2 ? expandCompactStructuralCachedFile(raw) : raw;
1955
+ if (!cached || cached.schema_version !== 1 || cached.path !== rel || cached.hash !== hash)
1956
+ return null;
1957
+ if (!cached.file || !Array.isArray(cached.symbols) || !Array.isArray(cached.imports) || !Array.isArray(cached.edges))
1569
1958
  return null;
1570
- if (!cached.file || !Array.isArray(cached.symbols) || !Array.isArray(cached.imports))
1959
+ if (cached.symbols.some((symbol) => typeof symbol.signature !== "string" || typeof symbol.export !== "boolean"))
1571
1960
  return null;
1572
1961
  return cached;
1573
1962
  }
@@ -1575,40 +1964,311 @@ function readCachedFileFacts(projectDir, rel, hash) {
1575
1964
  return null;
1576
1965
  }
1577
1966
  }
1578
- function writeCachedFileFacts(projectDir, facts) {
1579
- ensureDir(fileFactCacheDir(projectDir));
1580
- writeJson(fileFactCachePath(projectDir, facts.path, facts.hash), facts);
1967
+ function writeStructuralFileCachePack(projectDir, results) {
1968
+ const entries = {};
1969
+ for (const result of results) {
1970
+ entries[structuralPackedCacheKey(result.cached.path, result.cached.hash)] = compactStructuralCachedFile(result.cached);
1971
+ }
1972
+ writeJson(structuralPackedFileCachePath(projectDir), {
1973
+ schema_version: 1,
1974
+ provider: "kage-structural-file-cache",
1975
+ entries: Object.fromEntries(Object.entries(entries).sort(([a], [b]) => a.localeCompare(b))),
1976
+ });
1977
+ packedStructuralCache.delete((0, node_path_1.resolve)(projectDir));
1978
+ (0, node_fs_1.rmSync)(structuralFileCacheDir(projectDir), { recursive: true, force: true });
1581
1979
  }
1582
- function buildFileFacts(projectDir, absolutePath, knownFiles) {
1980
+ function buildStructuralFile(projectDir, absolutePath, knownFiles, prior) {
1583
1981
  const rel = (0, node_path_1.relative)(projectDir, absolutePath).replace(/\\/g, "/");
1584
- const content = (0, node_fs_1.readFileSync)(absolutePath, "utf8");
1585
- const fullHash = (0, node_crypto_1.createHash)("sha256").update(content).digest("hex");
1586
- const cached = readCachedFileFacts(projectDir, rel, fullHash);
1982
+ const stats = (0, node_fs_1.statSync)(absolutePath);
1983
+ const priorEntry = prior.file_entries[rel];
1984
+ const canReuseHash = priorEntry && priorEntry.size_bytes === stats.size && Math.round(priorEntry.mtime_ms) === Math.round(stats.mtimeMs);
1985
+ let buffer = canReuseHash ? null : (0, node_fs_1.readFileSync)(absolutePath);
1986
+ let hash = canReuseHash ? priorEntry.hash : sha256Hex(buffer ?? "");
1987
+ let cached = readCachedStructuralFile(projectDir, rel, hash);
1988
+ if (!cached && !buffer) {
1989
+ buffer = (0, node_fs_1.readFileSync)(absolutePath);
1990
+ hash = sha256Hex(buffer);
1991
+ cached = readCachedStructuralFile(projectDir, rel, hash);
1992
+ }
1993
+ const entry = {
1994
+ path: rel,
1995
+ size_bytes: stats.size,
1996
+ mtime_ms: stats.mtimeMs,
1997
+ hash,
1998
+ extraction: stats.size <= MAX_STRUCTURAL_EXTRACT_FILE_BYTES ? "structural" : "metadata-only",
1999
+ };
1587
2000
  if (cached)
1588
- return { facts: cached, content, cacheHit: true };
2001
+ return { cached, entry, cacheHit: true };
2002
+ const content = stats.size <= MAX_STRUCTURAL_EXTRACT_FILE_BYTES ? (buffer ?? (0, node_fs_1.readFileSync)(absolutePath)).toString("utf8") : null;
2003
+ const language = codeLanguage(rel);
2004
+ const parser = content ? codeParser(rel) : "metadata";
2005
+ const rawSymbols = [];
2006
+ const rawImports = [];
2007
+ if (content) {
2008
+ if (TS_AST_EXTENSIONS.has(extensionOf(rel))) {
2009
+ rawSymbols.push(...extractSymbols(rel, content));
2010
+ rawImports.push(...extractImports(projectDir, rel, content, knownFiles));
2011
+ }
2012
+ else if (CODE_EXTENSIONS.has(extensionOf(rel))) {
2013
+ rawSymbols.push(...extractGenericSymbols(rel, content));
2014
+ rawImports.push(...extractGenericImports(projectDir, rel, content, knownFiles));
2015
+ }
2016
+ }
2017
+ const symbols = rawSymbols.map((symbol) => ({
2018
+ id: symbol.id,
2019
+ name: symbol.name,
2020
+ kind: symbol.kind,
2021
+ path: symbol.path,
2022
+ language: symbol.language,
2023
+ parser: symbol.parser,
2024
+ export: symbol.export,
2025
+ line: symbol.line,
2026
+ end_line: symbol.end_line,
2027
+ signature: symbol.signature,
2028
+ confidence: "EXTRACTED",
2029
+ }));
2030
+ const edges = structuralEdgesFromFacts(rel, symbols, rawImports);
1589
2031
  const file = {
1590
- id: `file:${slugify(rel)}`,
2032
+ schema_version: 1,
1591
2033
  path: rel,
1592
- language: codeLanguage(rel),
1593
- parser: codeParser(rel),
2034
+ language,
1594
2035
  kind: codeFileKind(rel),
1595
- size_bytes: Buffer.byteLength(content),
1596
- line_count: content.split(/\r?\n/).length,
1597
- hash: fullHash.slice(0, 16),
2036
+ size_bytes: stats.size,
2037
+ line_count: content ? content.split(/\r?\n/).length : countBufferLines(buffer ?? (0, node_fs_1.readFileSync)(absolutePath)),
2038
+ hash: hash.slice(0, 16),
2039
+ mtime_ms: stats.mtimeMs,
2040
+ extraction: entry.extraction,
2041
+ confidence: "EXTRACTED",
2042
+ top_symbols: symbols.slice(0, 12).map((symbol) => symbol.name),
2043
+ imports_preview: rawImports.slice(0, 20).map((item) => item.specifier),
2044
+ signals: structuralSignals(rel, content, codeFileKind(rel)),
2045
+ concepts: [],
1598
2046
  };
2047
+ file.concepts = structuralConcepts(rel, symbols);
2048
+ const next = { schema_version: 1, path: rel, hash, file, symbols, imports: rawImports, edges };
2049
+ return { cached: next, entry, cacheHit: false };
2050
+ }
2051
+ function buildStructuralFileForWorker(projectDir, absolutePath, knownFiles, prior) {
2052
+ return buildStructuralFile(projectDir, absolutePath, new Set(knownFiles), prior);
2053
+ }
2054
+ function structuralWorkerPath() {
2055
+ return (0, node_path_1.join)(__dirname, "structural-worker.js");
2056
+ }
2057
+ function structuralWorkerCount(fileCount) {
2058
+ if (fileCount < MIN_STRUCTURAL_PARALLEL_FILES)
2059
+ return 1;
2060
+ return Math.max(1, Math.min(MAX_STRUCTURAL_WORKERS, fileCount));
2061
+ }
2062
+ function splitStructuralBatches(files, count) {
2063
+ const batches = Array.from({ length: count }, () => []);
2064
+ files.forEach((file, index) => batches[index % count].push(file));
2065
+ return batches.filter((batch) => batch.length > 0);
2066
+ }
2067
+ function buildStructuralFilesSerial(projectDir, scannedFiles, knownFiles, previous) {
2068
+ return {
2069
+ results: scannedFiles.map((absolutePath) => buildStructuralFile(projectDir, absolutePath, knownFiles, previous)),
2070
+ workerCount: 1,
2071
+ };
2072
+ }
2073
+ function buildStructuralFilesParallel(projectDir, scannedFiles, knownFiles, previous) {
2074
+ const workerCount = structuralWorkerCount(scannedFiles.length);
2075
+ if (workerCount <= 1)
2076
+ return buildStructuralFilesSerial(projectDir, scannedFiles, knownFiles, previous);
2077
+ const outDir = (0, node_path_1.join)(structuralIndexDir(projectDir), "worker-output", `${process.pid}-${Date.now()}`);
2078
+ ensureDir(outDir);
2079
+ const shared = new SharedArrayBuffer(Int32Array.BYTES_PER_ELEMENT);
2080
+ const done = new Int32Array(shared);
2081
+ const known = [...knownFiles];
2082
+ const batches = splitStructuralBatches(scannedFiles, workerCount);
2083
+ const workers = batches.map((files, index) => new node_worker_threads_1.Worker(structuralWorkerPath(), {
2084
+ workerData: {
2085
+ projectDir,
2086
+ files,
2087
+ knownFiles: known,
2088
+ prior: previous,
2089
+ outputPath: (0, node_path_1.join)(outDir, `worker-${index}.json`),
2090
+ shared,
2091
+ },
2092
+ }));
2093
+ const startedAt = Date.now();
2094
+ while (Atomics.load(done, 0) < batches.length) {
2095
+ const current = Atomics.load(done, 0);
2096
+ Atomics.wait(done, 0, current, 1000);
2097
+ if (Date.now() - startedAt > 10 * 60 * 1000) {
2098
+ for (const worker of workers)
2099
+ void worker.terminate();
2100
+ (0, node_fs_1.rmSync)(outDir, { recursive: true, force: true });
2101
+ throw new Error(`Structural index workers timed out after ${batches.length} batches`);
2102
+ }
2103
+ }
2104
+ const results = [];
2105
+ try {
2106
+ for (let index = 0; index < batches.length; index++) {
2107
+ const output = readJson((0, node_path_1.join)(outDir, `worker-${index}.json`));
2108
+ if (!output.ok)
2109
+ throw new Error(output.error ?? `Structural index worker ${index} failed`);
2110
+ results.push(...output.results);
2111
+ }
2112
+ }
2113
+ finally {
2114
+ (0, node_fs_1.rmSync)(outDir, { recursive: true, force: true });
2115
+ }
2116
+ return { results, workerCount: batches.length };
2117
+ }
2118
+ function structuralReport(index) {
2119
+ const languageLines = Object.entries(index.manifest.languages)
2120
+ .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
2121
+ .slice(0, 20)
2122
+ .map(([language, count]) => `- ${language}: ${count}`);
2123
+ const conceptLines = Object.entries(countBy(index.files.flatMap((file) => file.concepts), (concept) => concept))
2124
+ .sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
2125
+ .slice(0, 20)
2126
+ .map(([concept, count]) => `- ${concept}: ${count}`);
2127
+ return [
2128
+ "# Kage Structural Index",
2129
+ "",
2130
+ "This is the full-repo structural index used for fast large-repo orientation. It is generated, cache-backed, and separate from repo memory packets.",
2131
+ "",
2132
+ "## Coverage",
2133
+ "",
2134
+ `- Files: ${index.manifest.files.indexed}/${index.manifest.files.total}`,
2135
+ `- Metadata-only files: ${index.manifest.files.metadata_only}`,
2136
+ `- Ignored files: ${index.manifest.files.ignored}`,
2137
+ `- Symbols: ${index.symbols.length}`,
2138
+ `- Imports: ${index.imports.length}`,
2139
+ `- Edges: ${index.edges.length}`,
2140
+ `- Cache: ${index.manifest.cache.hits} hits, ${index.manifest.cache.misses} misses`,
2141
+ `- Workers: ${index.manifest.worker_count}`,
2142
+ "",
2143
+ "## Languages",
2144
+ "",
2145
+ ...(languageLines.length ? languageLines : ["- none"]),
2146
+ "",
2147
+ "## Top Concepts",
2148
+ "",
2149
+ ...(conceptLines.length ? conceptLines : ["- none"]),
2150
+ "",
2151
+ ].join("\n");
2152
+ }
2153
+ function buildStructuralIndex(projectDir) {
2154
+ ensureMemoryDirs(projectDir);
2155
+ ensureDir(structuralIndexDir(projectDir));
2156
+ const previous = readStructuralIndexManifest(projectDir);
2157
+ const scanned = scanStructuralFiles(projectDir);
2158
+ const knownFiles = new Set(scanned.files.map((file) => (0, node_path_1.relative)(projectDir, file).replace(/\\/g, "/")));
2159
+ const files = [];
1599
2160
  const symbols = [];
1600
2161
  const imports = [];
1601
- if (TS_AST_EXTENSIONS.has(extensionOf(rel))) {
1602
- symbols.push(...extractSymbols(rel, content));
1603
- imports.push(...extractImports(projectDir, rel, content, knownFiles));
2162
+ const edges = [];
2163
+ const fileEntries = {};
2164
+ let hits = 0;
2165
+ let misses = 0;
2166
+ const builtFiles = buildStructuralFilesParallel(projectDir, scanned.files, knownFiles, previous);
2167
+ for (const built of builtFiles.results) {
2168
+ if (built.cacheHit)
2169
+ hits += 1;
2170
+ else
2171
+ misses += 1;
2172
+ files.push(built.cached.file);
2173
+ symbols.push(...built.cached.symbols);
2174
+ imports.push(...built.cached.imports);
2175
+ edges.push(...built.cached.edges);
2176
+ fileEntries[built.entry.path] = built.entry;
2177
+ }
2178
+ files.sort((a, b) => a.path.localeCompare(b.path));
2179
+ symbols.sort((a, b) => a.path.localeCompare(b.path) || a.line - b.line || a.name.localeCompare(b.name));
2180
+ imports.sort((a, b) => a.from_path.localeCompare(b.from_path) || a.line - b.line || a.specifier.localeCompare(b.specifier));
2181
+ edges.sort((a, b) => a.source.localeCompare(b.source) || a.target.localeCompare(b.target) || a.relation.localeCompare(b.relation));
2182
+ const fingerprint = sha256Hex(Object.values(fileEntries)
2183
+ .map((entry) => `${entry.path}:${entry.size_bytes}:${Math.round(entry.mtime_ms)}:${entry.hash}`)
2184
+ .sort()
2185
+ .join("\n"));
2186
+ const deletedFiles = Object.keys(previous.file_entries).filter((path) => !fileEntries[path]).sort();
2187
+ writeStructuralFileCachePack(projectDir, builtFiles.results);
2188
+ const manifest = {
2189
+ schema_version: 1,
2190
+ project_dir: projectDir,
2191
+ repo_key: repoKey(projectDir),
2192
+ generated_at: nowIso(),
2193
+ provider: "kage-structural",
2194
+ limits: {
2195
+ max_extract_file_bytes: MAX_STRUCTURAL_EXTRACT_FILE_BYTES,
2196
+ max_workers: MAX_STRUCTURAL_WORKERS,
2197
+ min_parallel_files: MIN_STRUCTURAL_PARALLEL_FILES,
2198
+ },
2199
+ files: {
2200
+ total: scanned.files.length,
2201
+ indexed: files.length,
2202
+ metadata_only: files.filter((file) => file.extraction === "metadata-only").length,
2203
+ ignored: Object.values(scanned.ignoredSummary).reduce((sum, count) => sum + count, 0),
2204
+ },
2205
+ cache: {
2206
+ hits,
2207
+ misses,
2208
+ },
2209
+ symbols: symbols.length,
2210
+ imports: imports.length,
2211
+ edges: edges.length,
2212
+ languages: countBy(files, (file) => file.language),
2213
+ worker_count: builtFiles.workerCount,
2214
+ ignored_summary: scanned.ignoredSummary,
2215
+ deleted_files: deletedFiles,
2216
+ fingerprint,
2217
+ file_entries: fileEntries,
2218
+ };
2219
+ const index = { manifest, files, symbols, imports, edges, report: "" };
2220
+ index.report = structuralReport(index);
2221
+ writeJson((0, node_path_1.join)(structuralIndexDir(projectDir), "files.json"), files);
2222
+ writeJson((0, node_path_1.join)(structuralIndexDir(projectDir), "symbols.json"), symbols);
2223
+ writeJson((0, node_path_1.join)(structuralIndexDir(projectDir), "imports.json"), imports);
2224
+ writeJson((0, node_path_1.join)(structuralIndexDir(projectDir), "edges.json"), edges);
2225
+ (0, node_fs_1.writeFileSync)((0, node_path_1.join)(structuralIndexDir(projectDir), "report.md"), index.report, "utf8");
2226
+ writeStructuralIndexManifest(projectDir, manifest);
2227
+ writeJson((0, node_path_1.join)(indexesDir(projectDir), "structural.json"), {
2228
+ schema_version: 1,
2229
+ provider: "kage-structural",
2230
+ files: (0, node_path_1.relative)(projectDir, (0, node_path_1.join)(structuralIndexDir(projectDir), "files.json")),
2231
+ symbols: (0, node_path_1.relative)(projectDir, (0, node_path_1.join)(structuralIndexDir(projectDir), "symbols.json")),
2232
+ imports: (0, node_path_1.relative)(projectDir, (0, node_path_1.join)(structuralIndexDir(projectDir), "imports.json")),
2233
+ edges: (0, node_path_1.relative)(projectDir, (0, node_path_1.join)(structuralIndexDir(projectDir), "edges.json")),
2234
+ report: (0, node_path_1.relative)(projectDir, (0, node_path_1.join)(structuralIndexDir(projectDir), "report.md")),
2235
+ manifest: (0, node_path_1.relative)(projectDir, structuralManifestPath(projectDir)),
2236
+ file_count: files.length,
2237
+ symbol_count: symbols.length,
2238
+ import_count: imports.length,
2239
+ edge_count: edges.length,
2240
+ cache_hits: hits,
2241
+ cache_misses: misses,
2242
+ worker_count: builtFiles.workerCount,
2243
+ });
2244
+ return index;
2245
+ }
2246
+ function readCurrentStructuralIndex(projectDir) {
2247
+ const manifestPath = structuralManifestPath(projectDir);
2248
+ const filesPath = (0, node_path_1.join)(structuralIndexDir(projectDir), "files.json");
2249
+ const symbolsPath = (0, node_path_1.join)(structuralIndexDir(projectDir), "symbols.json");
2250
+ const importsPath = (0, node_path_1.join)(structuralIndexDir(projectDir), "imports.json");
2251
+ const edgesPath = (0, node_path_1.join)(structuralIndexDir(projectDir), "edges.json");
2252
+ if (![manifestPath, filesPath, symbolsPath, importsPath, edgesPath].every((path) => (0, node_fs_1.existsSync)(path)))
2253
+ return null;
2254
+ try {
2255
+ const manifest = readJson(manifestPath);
2256
+ if (manifest.schema_version !== 1 || manifest.provider !== "kage-structural")
2257
+ return null;
2258
+ return {
2259
+ manifest,
2260
+ files: readJson(filesPath),
2261
+ symbols: readJson(symbolsPath),
2262
+ imports: readJson(importsPath),
2263
+ edges: readJson(edgesPath),
2264
+ report: (0, node_fs_1.existsSync)((0, node_path_1.join)(structuralIndexDir(projectDir), "report.md"))
2265
+ ? (0, node_fs_1.readFileSync)((0, node_path_1.join)(structuralIndexDir(projectDir), "report.md"), "utf8")
2266
+ : "",
2267
+ };
1604
2268
  }
1605
- else if (CODE_EXTENSIONS.has(extensionOf(rel))) {
1606
- symbols.push(...extractGenericSymbols(rel, content));
1607
- imports.push(...extractGenericImports(projectDir, rel, content, knownFiles));
2269
+ catch {
2270
+ return null;
1608
2271
  }
1609
- const facts = { schema_version: 1, path: rel, hash: fullHash, file, symbols, imports };
1610
- writeCachedFileFacts(projectDir, facts);
1611
- return { facts, content, cacheHit: false };
1612
2272
  }
1613
2273
  function codeFilePriority(projectDir, absolutePath) {
1614
2274
  const rel = (0, node_path_1.relative)(projectDir, absolutePath).replace(/\\/g, "/");
@@ -2136,6 +2796,47 @@ function codeGraphInputHash(projectDir, absoluteFiles = listCodeFiles(projectDir
2136
2796
  ...fileInputEntries(projectDir, externalIndexFiles(projectDir).map((index) => index.path), "external_code_index"),
2137
2797
  ]);
2138
2798
  }
2799
+ function codeGraphInputHashFromStructural(projectDir, structural) {
2800
+ return codeGraphInputHashFromStructuralFingerprint(projectDir, structural.manifest.fingerprint);
2801
+ }
2802
+ function codeGraphInputHashFromStructuralFingerprint(projectDir, fingerprint) {
2803
+ return graphInputHash([
2804
+ { kind: "code_graph_input", path: ".agent_memory/structural/fingerprint", sha256: fingerprint },
2805
+ ...fileInputEntries(projectDir, externalIndexFiles(projectDir).map((index) => index.path), "external_code_index"),
2806
+ ]);
2807
+ }
2808
+ function currentStructuralFingerprint(projectDir, structural) {
2809
+ const scanned = scanStructuralFiles(projectDir);
2810
+ const entries = scanned.files
2811
+ .map((absolutePath) => {
2812
+ const rel = (0, node_path_1.relative)(projectDir, absolutePath).replace(/\\/g, "/");
2813
+ const stats = (0, node_fs_1.statSync)(absolutePath);
2814
+ const previous = structural.manifest.file_entries[rel];
2815
+ const hash = previous && previous.size_bytes === stats.size && Math.round(previous.mtime_ms) === Math.round(stats.mtimeMs)
2816
+ ? previous.hash
2817
+ : sha256Hex((0, node_fs_1.readFileSync)(absolutePath));
2818
+ return `${rel}:${stats.size}:${Math.round(stats.mtimeMs)}:${hash}`;
2819
+ })
2820
+ .sort();
2821
+ return sha256Hex(entries.join("\n"));
2822
+ }
2823
+ function currentCodeGraphInputHash(projectDir) {
2824
+ const structural = readCurrentStructuralIndex(projectDir);
2825
+ return structural ? codeGraphInputHashFromStructuralFingerprint(projectDir, currentStructuralFingerprint(projectDir, structural)) : codeGraphInputHash(projectDir);
2826
+ }
2827
+ function codeGraphStructuralFingerprint(projectDir, structural) {
2828
+ const entries = [
2829
+ `structural:${structural.manifest.fingerprint}`,
2830
+ ...externalIndexFiles(projectDir)
2831
+ .map((index) => index.path)
2832
+ .filter((path) => (0, node_fs_1.existsSync)(path))
2833
+ .map((path) => {
2834
+ const stats = (0, node_fs_1.statSync)(path);
2835
+ return `external:${projectRelative(projectDir, path)}:${stats.size}:${Math.round(stats.mtimeMs)}`;
2836
+ }),
2837
+ ];
2838
+ return sha256Hex(entries.sort().join("\n"));
2839
+ }
2139
2840
  function knowledgeGraphInputHash(projectDir, codeInputHash = codeGraphInputHash(projectDir)) {
2140
2841
  const packetEntries = loadPacketEntriesFromDir(packetsDir(projectDir))
2141
2842
  .filter((entry) => entry.packet.status === "approved")
@@ -2555,44 +3256,33 @@ function buildCodeGraph(projectDir, options = {}) {
2555
3256
  const head = gitHead(projectDir);
2556
3257
  const tree = gitTree(projectDir);
2557
3258
  const mergeBase = gitMergeBase(projectDir);
2558
- const selection = codeIndexSelection(projectDir);
2559
- const absoluteFiles = selection.files;
2560
- const fingerprint = codeGraphStatFingerprint(projectDir, absoluteFiles);
2561
- const cachedGraph = options.force ? null : readCachedCodeGraph(projectDir, fingerprint);
3259
+ const structural = buildStructuralIndex(projectDir);
3260
+ const fingerprint = codeGraphStructuralFingerprint(projectDir, structural);
3261
+ const cachedGraph = options.force ? null : readCachedCodeGraph(projectDir, fingerprint, structural);
2562
3262
  if (cachedGraph) {
2563
- selection.manifest.cache = { hits: absoluteFiles.length, misses: 0 };
2564
- selection.manifest.fingerprint = fingerprint;
2565
- writeCodeIndexManifest(projectDir, selection.manifest);
3263
+ const manifest = codeIndexManifestFromStructural(projectDir, structural, fingerprint, { hits: structural.files.length, misses: 0 });
3264
+ writeCodeIndexManifest(projectDir, manifest);
3265
+ removeLegacyCodeGraphSplits(projectDir);
2566
3266
  return cachedGraph;
2567
3267
  }
2568
- const inputHash = codeGraphInputHash(projectDir, absoluteFiles);
2569
- selection.manifest.fingerprint = fingerprint;
2570
- writeCodeIndexManifest(projectDir, selection.manifest);
2571
- const knownFiles = new Set(absoluteFiles.map((path) => (0, node_path_1.relative)(projectDir, path).replace(/\\/g, "/")));
2572
- const files = [];
2573
- const symbols = [];
2574
- const imports = [];
3268
+ const inputHash = codeGraphInputHashFromStructural(projectDir, structural);
3269
+ const files = structural.files.map(codeFileFromStructural);
3270
+ const symbols = structural.symbols.map(codeSymbolFromStructural);
3271
+ const imports = structural.imports.slice();
2575
3272
  const contents = new Map();
2576
- let cacheHits = 0;
2577
- let cacheMisses = 0;
2578
- for (const absolutePath of absoluteFiles) {
2579
- const { facts, content, cacheHit } = buildFileFacts(projectDir, absolutePath, knownFiles);
2580
- if (cacheHit)
2581
- cacheHits++;
2582
- else
2583
- cacheMisses++;
2584
- contents.set(facts.path, content);
2585
- files.push(facts.file);
2586
- symbols.push(...facts.symbols.slice(0, Math.max(0, MAX_CODE_GRAPH_SYMBOLS - symbols.length)));
2587
- imports.push(...facts.imports);
2588
- }
2589
- selection.manifest.cache = { hits: cacheHits, misses: cacheMisses };
2590
- writeCodeIndexManifest(projectDir, selection.manifest);
3273
+ for (const file of structural.files) {
3274
+ if (!TS_AST_EXTENSIONS.has(extensionOf(file.path)))
3275
+ continue;
3276
+ if (file.size_bytes > MAX_CODE_FILE_BYTES)
3277
+ continue;
3278
+ const absolutePath = (0, node_path_1.join)(projectDir, file.path);
3279
+ if ((0, node_fs_1.existsSync)(absolutePath))
3280
+ contents.set(file.path, (0, node_fs_1.readFileSync)(absolutePath, "utf8"));
3281
+ }
3282
+ writeCodeIndexManifest(projectDir, codeIndexManifestFromStructural(projectDir, structural, fingerprint, structural.manifest.cache));
2591
3283
  const externalFacts = loadExternalCodeFacts(projectDir);
2592
3284
  const fileByPath = new Map(files.map((file) => [file.path, file]));
2593
3285
  const addSymbol = (symbol) => {
2594
- if (symbols.length >= MAX_CODE_GRAPH_SYMBOLS)
2595
- return;
2596
3286
  if (!fileByPath.has(symbol.path))
2597
3287
  return;
2598
3288
  const file = fileByPath.get(symbol.path);
@@ -2658,17 +3348,105 @@ function buildCodeGraph(projectDir, options = {}) {
2658
3348
  tests: tests.sort((a, b) => a.test_path.localeCompare(b.test_path) || a.line - b.line),
2659
3349
  packages: extractPackages(projectDir),
2660
3350
  };
2661
- writeJson((0, node_path_1.join)(codeGraphDir(projectDir), "files.json"), graph.files);
2662
- writeJson((0, node_path_1.join)(codeGraphDir(projectDir), "symbols.json"), graph.symbols);
2663
- writeJson((0, node_path_1.join)(codeGraphDir(projectDir), "imports.json"), graph.imports);
2664
- writeJson((0, node_path_1.join)(codeGraphDir(projectDir), "calls.json"), graph.calls);
2665
- writeJson((0, node_path_1.join)(codeGraphDir(projectDir), "routes.json"), graph.routes);
2666
- writeJson((0, node_path_1.join)(codeGraphDir(projectDir), "tests.json"), graph.tests);
2667
- writeJson((0, node_path_1.join)(codeGraphDir(projectDir), "packages.json"), graph.packages);
2668
- writeJson((0, node_path_1.join)(codeGraphDir(projectDir), "graph.json"), graph);
3351
+ removeLegacyCodeGraphSplits(projectDir);
3352
+ writeJson((0, node_path_1.join)(codeGraphDir(projectDir), "graph.json"), compactCodeGraphArtifact(projectDir, graph, structural));
2669
3353
  graphMemoryCache.delete((0, node_path_1.resolve)(projectDir));
2670
3354
  return graph;
2671
3355
  }
3356
+ const PRECISE_MEMORY_CODE_PACKET_TYPES = new Set([
3357
+ "bug_fix",
3358
+ "code_explanation",
3359
+ "constraint",
3360
+ "convention",
3361
+ "decision",
3362
+ "gotcha",
3363
+ "rationale",
3364
+ ]);
3365
+ const GENERIC_MEMORY_CODE_SYMBOL_NAMES = new Set([
3366
+ "app",
3367
+ "body",
3368
+ "code",
3369
+ "config",
3370
+ "context",
3371
+ "current",
3372
+ "data",
3373
+ "edge",
3374
+ "edges",
3375
+ "entity",
3376
+ "entities",
3377
+ "file",
3378
+ "files",
3379
+ "from",
3380
+ "graph",
3381
+ "id",
3382
+ "index",
3383
+ "indexes",
3384
+ "input",
3385
+ "item",
3386
+ "items",
3387
+ "memory",
3388
+ "name",
3389
+ "next",
3390
+ "node",
3391
+ "nodes",
3392
+ "output",
3393
+ "packet",
3394
+ "packets",
3395
+ "path",
3396
+ "paths",
3397
+ "project",
3398
+ "projectdir",
3399
+ "query",
3400
+ "result",
3401
+ "results",
3402
+ "root",
3403
+ "state",
3404
+ "status",
3405
+ "summary",
3406
+ "test",
3407
+ "tests",
3408
+ "title",
3409
+ "to",
3410
+ "type",
3411
+ "types",
3412
+ "value",
3413
+ ]);
3414
+ const MAX_PRECISE_SYMBOL_LINKS_PER_PACKET = 24;
3415
+ const MAX_PRECISE_TEST_LINKS_PER_PACKET = 12;
3416
+ function isPreciseMemoryCodePacket(packet) {
3417
+ return PRECISE_MEMORY_CODE_PACKET_TYPES.has(packet.type);
3418
+ }
3419
+ function meaningfulSymbolNameForMemoryLink(name) {
3420
+ const normalized = name.trim();
3421
+ if (normalized.length < 4)
3422
+ return false;
3423
+ const compact = normalized.toLowerCase().replace(/[^a-z0-9_$]/g, "");
3424
+ if (!compact || compact.length < 4)
3425
+ return false;
3426
+ if (GENERIC_MEMORY_CODE_SYMBOL_NAMES.has(compact))
3427
+ return false;
3428
+ return /[a-z]/i.test(compact);
3429
+ }
3430
+ function escapeRegex(value) {
3431
+ return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
3432
+ }
3433
+ function packetTextMentionsIdentifier(packetTextLower, identifier) {
3434
+ const text = identifier.trim().toLowerCase();
3435
+ if (!text)
3436
+ return false;
3437
+ if (/^[a-z0-9_$]+$/.test(text)) {
3438
+ return new RegExp(`(^|[^a-z0-9_$])${escapeRegex(text)}([^a-z0-9_$]|$)`).test(packetTextLower);
3439
+ }
3440
+ return packetTextLower.includes(text);
3441
+ }
3442
+ function symbolMatchesPacketText(packetTextLower, symbol) {
3443
+ return meaningfulSymbolNameForMemoryLink(symbol.name) && packetTextMentionsIdentifier(packetTextLower, symbol.name);
3444
+ }
3445
+ function testMatchesPacketText(packetTextLower, test) {
3446
+ return packetTextMentionsIdentifier(packetTextLower, test.title) ||
3447
+ packetTextMentionsIdentifier(packetTextLower, test.test_symbol) ||
3448
+ Boolean(test.covers_symbol && packetTextMentionsIdentifier(packetTextLower, test.covers_symbol));
3449
+ }
2672
3450
  function buildKnowledgeGraph(projectDir, codeGraph = buildCodeGraph(projectDir)) {
2673
3451
  ensureMemoryDirs(projectDir);
2674
3452
  const packets = loadApprovedPackets(projectDir).sort((a, b) => a.id.localeCompare(b.id));
@@ -2882,15 +3660,20 @@ function buildKnowledgeGraph(projectDir, codeGraph = buildCodeGraph(projectDir))
2882
3660
  : packet.type === "decision" || packet.type === "rationale" || packet.type === "constraint"
2883
3661
  ? "informs_symbol"
2884
3662
  : "explains_symbol";
3663
+ let preciseSymbolLinks = 0;
2885
3664
  for (const symbol of codeGraph.symbols.filter((symbol) => packetPathSet.has(symbol.path))) {
2886
- if (packet.type !== "code_explanation" && !packetTextLower.includes(symbol.name.toLowerCase()))
3665
+ if (!isPreciseMemoryCodePacket(packet))
3666
+ continue;
3667
+ if (preciseSymbolLinks >= MAX_PRECISE_SYMBOL_LINKS_PER_PACKET)
3668
+ break;
3669
+ if (!symbolMatchesPacketText(packetTextLower, symbol))
2887
3670
  continue;
2888
3671
  const symbolEntityId = graphEntityId("symbol", symbol.id);
2889
3672
  addEntity(entities, {
2890
3673
  id: symbolEntityId,
2891
3674
  type: "symbol",
2892
3675
  name: symbol.name,
2893
- aliases: [symbol.id, symbol.path],
3676
+ aliases: [symbol.id],
2894
3677
  summary: `${symbol.kind} in ${symbol.path}:${symbol.line}`,
2895
3678
  first_seen_at: packet.created_at,
2896
3679
  last_seen_at: packet.updated_at,
@@ -2908,14 +3691,15 @@ function buildKnowledgeGraph(projectDir, codeGraph = buildCodeGraph(projectDir))
2908
3691
  commit: head,
2909
3692
  evidence: [episodeId],
2910
3693
  });
3694
+ preciseSymbolLinks += 1;
2911
3695
  }
2912
- for (const route of codeGraph.routes.filter((route) => packetPathSet.has(route.file_path) && packetTextLower.includes(route.path.toLowerCase()))) {
3696
+ for (const route of codeGraph.routes.filter((route) => isPreciseMemoryCodePacket(packet) && packetPathSet.has(route.file_path) && packetTextMentionsIdentifier(packetTextLower, route.path))) {
2913
3697
  const routeEntityId = graphEntityId("route", route.id);
2914
3698
  addEntity(entities, {
2915
3699
  id: routeEntityId,
2916
3700
  type: "route",
2917
3701
  name: `${route.method} ${route.path}`,
2918
- aliases: [route.id, route.file_path],
3702
+ aliases: [route.id],
2919
3703
  summary: `${route.framework} route in ${route.file_path}:${route.line}`,
2920
3704
  first_seen_at: packet.created_at,
2921
3705
  last_seen_at: packet.updated_at,
@@ -2934,13 +3718,20 @@ function buildKnowledgeGraph(projectDir, codeGraph = buildCodeGraph(projectDir))
2934
3718
  evidence: [episodeId],
2935
3719
  });
2936
3720
  }
3721
+ let preciseTestLinks = 0;
2937
3722
  for (const test of codeGraph.tests.filter((test) => packetPathSet.has(test.test_path) || Boolean(test.covers_path && packetPathSet.has(test.covers_path)))) {
3723
+ if (!isPreciseMemoryCodePacket(packet))
3724
+ continue;
3725
+ if (preciseTestLinks >= MAX_PRECISE_TEST_LINKS_PER_PACKET)
3726
+ break;
3727
+ if (!testMatchesPacketText(packetTextLower, test))
3728
+ continue;
2938
3729
  const testEntityId = graphEntityId("test", test.test_symbol);
2939
3730
  addEntity(entities, {
2940
3731
  id: testEntityId,
2941
3732
  type: "test",
2942
3733
  name: test.title,
2943
- aliases: [test.test_symbol, test.test_path],
3734
+ aliases: [test.test_symbol],
2944
3735
  summary: `Test in ${test.test_path}:${test.line}${test.covers_symbol ? ` covers ${test.covers_symbol}` : ""}`,
2945
3736
  first_seen_at: packet.created_at,
2946
3737
  last_seen_at: packet.updated_at,
@@ -2958,6 +3749,7 @@ function buildKnowledgeGraph(projectDir, codeGraph = buildCodeGraph(projectDir))
2958
3749
  commit: head,
2959
3750
  evidence: [episodeId],
2960
3751
  });
3752
+ preciseTestLinks += 1;
2961
3753
  }
2962
3754
  }
2963
3755
  const manifestCommands = npmScriptCommands(projectDir);
@@ -3011,10 +3803,47 @@ function buildKnowledgeGraph(projectDir, codeGraph = buildCodeGraph(projectDir))
3011
3803
  writeJson((0, node_path_1.join)(graphDir(projectDir), "episodes.json"), graph.episodes);
3012
3804
  writeJson((0, node_path_1.join)(graphDir(projectDir), "entities.json"), graph.entities);
3013
3805
  writeJson((0, node_path_1.join)(graphDir(projectDir), "edges.json"), graph.edges);
3014
- writeJson((0, node_path_1.join)(graphDir(projectDir), "graph.json"), graph);
3806
+ writeJson((0, node_path_1.join)(graphDir(projectDir), "graph.json"), compactKnowledgeGraphArtifact(projectDir, graph));
3015
3807
  graphMemoryCache.delete((0, node_path_1.resolve)(projectDir));
3016
3808
  return graph;
3017
3809
  }
3810
+ function compactKnowledgeGraphArtifact(projectDir, graph) {
3811
+ return {
3812
+ schema_version: 1,
3813
+ compact: true,
3814
+ project_dir: graph.project_dir,
3815
+ repo_key: graph.repo_key,
3816
+ generated_from_updated_at: graph.generated_from_updated_at,
3817
+ repo_state: graph.repo_state,
3818
+ refs: {
3819
+ episodes: (0, node_path_1.relative)(graphDir(projectDir), (0, node_path_1.join)(graphDir(projectDir), "episodes.json")).replace(/\\/g, "/"),
3820
+ entities: (0, node_path_1.relative)(graphDir(projectDir), (0, node_path_1.join)(graphDir(projectDir), "entities.json")).replace(/\\/g, "/"),
3821
+ edges: (0, node_path_1.relative)(graphDir(projectDir), (0, node_path_1.join)(graphDir(projectDir), "edges.json")).replace(/\\/g, "/"),
3822
+ },
3823
+ };
3824
+ }
3825
+ function isCompactKnowledgeGraphArtifact(value) {
3826
+ return Boolean(value && typeof value === "object" && value.compact === true && value.refs);
3827
+ }
3828
+ function hydrateKnowledgeGraphArtifact(projectDir, artifact) {
3829
+ if (!isCompactKnowledgeGraphArtifact(artifact))
3830
+ return artifact;
3831
+ const episodesPath = (0, node_path_1.join)(graphDir(projectDir), artifact.refs.episodes);
3832
+ const entitiesPath = (0, node_path_1.join)(graphDir(projectDir), artifact.refs.entities);
3833
+ const edgesPath = (0, node_path_1.join)(graphDir(projectDir), artifact.refs.edges);
3834
+ if (![episodesPath, entitiesPath, edgesPath].every((path) => (0, node_fs_1.existsSync)(path)))
3835
+ return null;
3836
+ return {
3837
+ schema_version: 1,
3838
+ project_dir: artifact.project_dir,
3839
+ repo_key: artifact.repo_key,
3840
+ generated_from_updated_at: artifact.generated_from_updated_at,
3841
+ repo_state: artifact.repo_state,
3842
+ episodes: readJson(episodesPath),
3843
+ entities: readJson(entitiesPath),
3844
+ edges: readJson(edgesPath),
3845
+ };
3846
+ }
3018
3847
  function buildPacketIndexes(projectDir) {
3019
3848
  ensureMemoryDirs(projectDir);
3020
3849
  const packets = loadPacketsFromDir(packetsDir(projectDir)).sort((a, b) => a.id.localeCompare(b.id));
@@ -3066,8 +3895,17 @@ function readCurrentCodeGraph(projectDir, expectedInputHash) {
3066
3895
  if (!(0, node_fs_1.existsSync)(path))
3067
3896
  return null;
3068
3897
  try {
3069
- const graph = readJson(path);
3070
- const inputHash = expectedInputHash ?? codeGraphInputHash(projectDir, codeIndexSelection(projectDir).files);
3898
+ const artifact = readJson(path);
3899
+ const structural = expectedInputHash ? null : readCurrentStructuralIndex(projectDir);
3900
+ if (!expectedInputHash && !structural)
3901
+ return null;
3902
+ const inputHash = expectedInputHash ?? codeGraphInputHashFromStructuralFingerprint(projectDir, currentStructuralFingerprint(projectDir, structural));
3903
+ const graphInputHash = artifact.repo_state?.input_hash;
3904
+ if (graphInputHash !== inputHash)
3905
+ return null;
3906
+ const graph = hydrateCodeGraphArtifact(projectDir, artifact, structural ?? undefined);
3907
+ if (!graph)
3908
+ return null;
3071
3909
  if (graph.repo_state?.input_hash !== inputHash)
3072
3910
  return null;
3073
3911
  return graph;
@@ -3081,8 +3919,13 @@ function readCurrentKnowledgeGraph(projectDir, codeGraph, expectedInputHash) {
3081
3919
  if (!(0, node_fs_1.existsSync)(path))
3082
3920
  return null;
3083
3921
  try {
3084
- const graph = readJson(path);
3922
+ const artifact = readJson(path);
3085
3923
  const inputHash = expectedInputHash ?? knowledgeGraphInputHash(projectDir, codeGraph.repo_state.input_hash ?? codeGraphInputHash(projectDir));
3924
+ if (artifact.repo_state?.input_hash !== inputHash)
3925
+ return null;
3926
+ const graph = hydrateKnowledgeGraphArtifact(projectDir, artifact);
3927
+ if (!graph)
3928
+ return null;
3086
3929
  if (graph.repo_state?.input_hash !== inputHash)
3087
3930
  return null;
3088
3931
  return graph;
@@ -3091,14 +3934,14 @@ function readCurrentKnowledgeGraph(projectDir, codeGraph, expectedInputHash) {
3091
3934
  return null;
3092
3935
  }
3093
3936
  }
3094
- function graphFastFingerprint(projectDir, selection = codeIndexSelection(projectDir)) {
3937
+ function graphFastFingerprint(projectDir) {
3095
3938
  const packetPaths = (0, node_fs_1.existsSync)(packetsDir(projectDir))
3096
3939
  ? (0, node_fs_1.readdirSync)(packetsDir(projectDir))
3097
3940
  .filter((name) => name.endsWith(".json"))
3098
3941
  .map((name) => (0, node_path_1.join)(packetsDir(projectDir), name))
3099
3942
  : [];
3100
3943
  const paths = [
3101
- ...selection.files,
3944
+ ...scanStructuralFiles(projectDir).files,
3102
3945
  ...externalIndexFiles(projectDir).map((index) => index.path),
3103
3946
  ...packetPaths,
3104
3947
  ];
@@ -3112,14 +3955,16 @@ function graphFastFingerprint(projectDir, selection = codeIndexSelection(project
3112
3955
  return sha256Hex(entries.join("\n"));
3113
3956
  }
3114
3957
  function readCurrentGraphs(projectDir) {
3115
- const selection = codeIndexSelection(projectDir);
3116
- const fingerprint = graphFastFingerprint(projectDir, selection);
3958
+ const fingerprint = graphFastFingerprint(projectDir);
3117
3959
  const cacheKey = (0, node_path_1.resolve)(projectDir);
3118
3960
  const cached = graphMemoryCache.get(cacheKey);
3119
3961
  if (cached?.fingerprint === fingerprint) {
3120
3962
  return { codeGraph: cached.codeGraph, knowledgeGraph: cached.knowledgeGraph };
3121
3963
  }
3122
- const codeInputHash = codeGraphInputHash(projectDir, selection.files);
3964
+ const structural = readCurrentStructuralIndex(projectDir);
3965
+ if (!structural)
3966
+ return null;
3967
+ const codeInputHash = codeGraphInputHashFromStructuralFingerprint(projectDir, currentStructuralFingerprint(projectDir, structural));
3123
3968
  const knowledgeInputHash = knowledgeGraphInputHash(projectDir, codeInputHash);
3124
3969
  if (cached?.codeInputHash === codeInputHash && cached.knowledgeInputHash === knowledgeInputHash) {
3125
3970
  cached.fingerprint = fingerprint;
@@ -3143,6 +3988,7 @@ function currentOrBuildGraphs(projectDir) {
3143
3988
  (0, node_path_1.join)(indexesDir(projectDir), "by-path.json"),
3144
3989
  (0, node_path_1.join)(indexesDir(projectDir), "by-tag.json"),
3145
3990
  (0, node_path_1.join)(indexesDir(projectDir), "by-type.json"),
3991
+ (0, node_path_1.join)(indexesDir(projectDir), "structural.json"),
3146
3992
  (0, node_path_1.join)(indexesDir(projectDir), "graph.json"),
3147
3993
  (0, node_path_1.join)(indexesDir(projectDir), "code-graph.json"),
3148
3994
  ],
@@ -3169,13 +4015,11 @@ function buildGraphIndexes(projectDir, options = {}) {
3169
4015
  });
3170
4016
  writeJson(codeGraphIndexPath, {
3171
4017
  schema_version: codeGraph.schema_version,
3172
- files: (0, node_path_1.relative)(projectDir, (0, node_path_1.join)(codeGraphDir(projectDir), "files.json")),
3173
- symbols: (0, node_path_1.relative)(projectDir, (0, node_path_1.join)(codeGraphDir(projectDir), "symbols.json")),
3174
- imports: (0, node_path_1.relative)(projectDir, (0, node_path_1.join)(codeGraphDir(projectDir), "imports.json")),
3175
- calls: (0, node_path_1.relative)(projectDir, (0, node_path_1.join)(codeGraphDir(projectDir), "calls.json")),
3176
- routes: (0, node_path_1.relative)(projectDir, (0, node_path_1.join)(codeGraphDir(projectDir), "routes.json")),
3177
- tests: (0, node_path_1.relative)(projectDir, (0, node_path_1.join)(codeGraphDir(projectDir), "tests.json")),
3178
- packages: (0, node_path_1.relative)(projectDir, (0, node_path_1.join)(codeGraphDir(projectDir), "packages.json")),
4018
+ mode: "structural-references",
4019
+ graph: (0, node_path_1.relative)(projectDir, (0, node_path_1.join)(codeGraphDir(projectDir), "graph.json")),
4020
+ files: (0, node_path_1.relative)(projectDir, (0, node_path_1.join)(structuralIndexDir(projectDir), "files.json")),
4021
+ symbols: (0, node_path_1.relative)(projectDir, (0, node_path_1.join)(structuralIndexDir(projectDir), "symbols.json")),
4022
+ imports: (0, node_path_1.relative)(projectDir, (0, node_path_1.join)(structuralIndexDir(projectDir), "imports.json")),
3179
4023
  file_count: codeGraph.files.length,
3180
4024
  symbol_count: codeGraph.symbols.length,
3181
4025
  import_count: codeGraph.imports.length,
@@ -3191,7 +4035,7 @@ function buildGraphIndexes(projectDir, options = {}) {
3191
4035
  knowledgeGraph,
3192
4036
  });
3193
4037
  return {
3194
- indexes: [...written, graphIndexPath, codeGraphIndexPath],
4038
+ indexes: [...written, (0, node_path_1.join)(indexesDir(projectDir), "structural.json"), graphIndexPath, codeGraphIndexPath],
3195
4039
  codeGraph,
3196
4040
  knowledgeGraph,
3197
4041
  };
@@ -3755,13 +4599,24 @@ function scoreText(terms, text, boosts = []) {
3755
4599
  score += 1 + Math.min(occurrences, 4);
3756
4600
  if (firstIndex < 80)
3757
4601
  score += 1;
3758
- if (boosts.some((boost) => boost.toLowerCase().includes(term) || term.includes(boost.toLowerCase())))
3759
- score += 2;
4602
+ score += boosts.reduce((best, boost) => Math.max(best, boostTermScore(boost, term)), 0);
3760
4603
  }
3761
4604
  if (terms.length > 1 && terms.every((term) => haystack.includes(term)))
3762
4605
  score += 3;
3763
4606
  return score;
3764
4607
  }
4608
+ function boostTermScore(boost, term) {
4609
+ const normalized = boost.toLowerCase();
4610
+ if (normalized === term)
4611
+ return 8;
4612
+ if (tokenize(normalized).includes(term))
4613
+ return 5;
4614
+ if (term.length >= 6 && normalized.includes(term))
4615
+ return 2;
4616
+ if (normalized.length >= 6 && term.includes(normalized))
4617
+ return 2;
4618
+ return 0;
4619
+ }
3765
4620
  function queryCodeGraph(projectDir, query, limit = 10, graph) {
3766
4621
  graph = graph ?? readCurrentCodeGraph(projectDir) ?? buildCodeGraph(projectDir);
3767
4622
  const terms = tokenize(query);
@@ -3808,6 +4663,46 @@ function queryCodeGraph(projectDir, query, limit = 10, graph) {
3808
4663
  const calls = graph.calls
3809
4664
  .filter((call) => symbolIds.has(call.to_symbol) || Boolean(call.from_symbol && symbolIds.has(call.from_symbol)))
3810
4665
  .slice(0, limit);
4666
+ const structuralIndex = readCurrentStructuralIndex(projectDir);
4667
+ const graphPaths = new Set(graph.files.map((file) => file.path));
4668
+ const graphSymbolIds = new Set(graph.symbols.map((symbol) => symbol.id));
4669
+ const structuralFiles = structuralIndex
4670
+ ? structuralIndex.files
4671
+ .map((file) => ({
4672
+ file,
4673
+ score: scoreText(terms, `${file.path} ${file.kind} ${file.language} ${file.extraction} ${file.signals.join(" ")} ${file.concepts.join(" ")} ${file.top_symbols.join(" ")}`, [file.path, file.language, ...file.concepts]),
4674
+ }))
4675
+ .filter((entry) => entry.score > 0 && !graphPaths.has(entry.file.path))
4676
+ .sort((a, b) => b.score - a.score || a.file.path.localeCompare(b.file.path))
4677
+ .slice(0, limit)
4678
+ .map((entry) => entry.file)
4679
+ : [];
4680
+ const structuralSymbols = structuralIndex
4681
+ ? structuralIndex.symbols
4682
+ .map((symbol) => ({
4683
+ symbol,
4684
+ score: scoreText(terms, `${symbol.name} ${symbol.kind} ${symbol.path} ${symbol.language} ${symbol.parser}`, [symbol.name, symbol.path]),
4685
+ }))
4686
+ .filter((entry) => entry.score > 0 && !graphSymbolIds.has(entry.symbol.id))
4687
+ .sort((a, b) => b.score - a.score || a.symbol.path.localeCompare(b.symbol.path) || a.symbol.line - b.symbol.line)
4688
+ .slice(0, limit)
4689
+ .map((entry) => entry.symbol)
4690
+ : [];
4691
+ const structuralRelevantPaths = new Set([
4692
+ ...structuralFiles.map((file) => file.path),
4693
+ ...structuralSymbols.map((symbol) => symbol.path),
4694
+ ]);
4695
+ const structuralEdges = structuralIndex
4696
+ ? structuralIndex.edges
4697
+ .map((edge) => ({
4698
+ edge,
4699
+ score: scoreText(terms, `${edge.relation} ${edge.source} ${edge.target} ${edge.source_file}`, [edge.source_file, edge.target]),
4700
+ }))
4701
+ .filter((entry) => entry.score > 0 || structuralRelevantPaths.has(entry.edge.source_file))
4702
+ .sort((a, b) => b.score - a.score || a.edge.source_file.localeCompare(b.edge.source_file) || a.edge.target.localeCompare(b.edge.target))
4703
+ .slice(0, limit)
4704
+ .map((entry) => entry.edge)
4705
+ : [];
3811
4706
  const lines = [
3812
4707
  "# Kage Code Graph Context",
3813
4708
  "",
@@ -3818,6 +4713,14 @@ function queryCodeGraph(projectDir, query, limit = 10, graph) {
3818
4713
  ...symbols.map((symbol, index) => `${index + 1}. [symbol] ${symbol.kind} ${symbol.name} in ${symbol.path}:${symbol.line} (${symbol.language}, ${symbol.parser})`),
3819
4714
  ...tests.map((test, index) => `${index + 1}. [test] ${test.title} in ${test.test_path}:${test.line}${test.covers_symbol ? ` covers ${test.covers_symbol}` : ""}`),
3820
4715
  ...files.slice(0, 5).map((file, index) => `${index + 1}. [file] ${file.path} (${file.kind}, ${file.language}, ${file.parser})`),
4716
+ structuralFiles.length || structuralSymbols.length || structuralEdges.length ? "" : "",
4717
+ structuralFiles.length || structuralSymbols.length || structuralEdges.length ? "## Structural Index" : "",
4718
+ ...structuralSymbols.map((symbol, index) => `${index + 1}. [structural symbol] ${symbol.kind} ${symbol.name} in ${symbol.path}:${symbol.line} (${symbol.language}, ${symbol.parser})`),
4719
+ ...structuralFiles.slice(0, 5).map((file, index) => `${index + 1}. [structural file] ${file.path} (${file.kind}, ${file.language}, ${file.extraction})`),
4720
+ ...structuralEdges
4721
+ .filter((edge) => edge.relation === "imports")
4722
+ .slice(0, 5)
4723
+ .map((edge, index) => `${index + 1}. [structural import] ${edge.source_file}${edge.source_location ? `:${edge.source_location.replace(/^L/, "")}` : ""} -> ${edge.target} (${edge.confidence})`),
3821
4724
  imports.length ? "" : "",
3822
4725
  imports.length ? "## Imports" : "",
3823
4726
  ...imports.map(({ item }, index) => `${index + 1}. ${item.from_path}:${item.line} ${item.kind} ${item.specifier}${item.to_path ? ` -> ${item.to_path}` : ""}`),
@@ -3825,7 +4728,19 @@ function queryCodeGraph(projectDir, query, limit = 10, graph) {
3825
4728
  calls.length ? "## Calls" : "",
3826
4729
  ...calls.map((call, index) => `${index + 1}. ${call.from_symbol ? symbolNameById.get(call.from_symbol) ?? call.from_symbol : call.path} calls ${symbolNameById.get(call.to_symbol) ?? call.to_symbol} at ${call.path}:${call.line}`),
3827
4730
  ];
3828
- return { query, context_block: lines.join("\n"), files, symbols, imports: imports.map((entry) => entry.item), calls, routes, tests };
4731
+ return {
4732
+ query,
4733
+ context_block: lines.join("\n"),
4734
+ files,
4735
+ symbols,
4736
+ imports: imports.map((entry) => entry.item),
4737
+ calls,
4738
+ routes,
4739
+ tests,
4740
+ structural_files: structuralFiles,
4741
+ structural_symbols: structuralSymbols,
4742
+ structural_edges: structuralEdges,
4743
+ };
3829
4744
  }
3830
4745
  function queryGraph(projectDir, query, limit = 10, graph) {
3831
4746
  graph = graph ?? readCurrentGraphs(projectDir)?.knowledgeGraph ?? buildKnowledgeGraph(projectDir);
@@ -3908,6 +4823,7 @@ function kageMetrics(projectDir) {
3908
4823
  const policyPath = (0, node_path_1.join)(projectDir, "AGENTS.md");
3909
4824
  const policyInstalled = (0, node_fs_1.existsSync)(policyPath) && (0, node_fs_1.readFileSync)(policyPath, "utf8").includes(AGENTS_POLICY_MARKER);
3910
4825
  const indexManifest = readCodeIndexManifest(projectDir);
4826
+ const structuralManifest = readStructuralIndexManifest(projectDir);
3911
4827
  const sourceFiles = codeGraph.files.filter((file) => file.kind === "source" || file.kind === "test");
3912
4828
  const indexedSourceFiles = sourceFiles.filter((file) => file.parser !== "metadata");
3913
4829
  const coverage = indexManifest.coverage.indexable_files > 0 ? indexManifest.coverage.coverage_percent : percent(indexedSourceFiles.length, sourceFiles.length);
@@ -3958,6 +4874,17 @@ function kageMetrics(projectDir) {
3958
4874
  cache_hits: indexManifest.cache.hits,
3959
4875
  cache_misses: indexManifest.cache.misses,
3960
4876
  },
4877
+ structural_index: {
4878
+ files: structuralManifest.files.indexed,
4879
+ symbols: structuralManifest.symbols,
4880
+ edges: structuralManifest.edges,
4881
+ metadata_only_files: structuralManifest.files.metadata_only,
4882
+ ignored_files: structuralManifest.files.ignored,
4883
+ languages: structuralManifest.languages,
4884
+ worker_count: structuralManifest.worker_count,
4885
+ cache_hits: structuralManifest.cache.hits,
4886
+ cache_misses: structuralManifest.cache.misses,
4887
+ },
3961
4888
  memory_graph: {
3962
4889
  approved_packets: approvedPackets,
3963
4890
  pending_packets: pendingPackets,
@@ -4011,7 +4938,9 @@ function auditProject(projectDir) {
4011
4938
  const preciseFiles = codeGraph.files.filter((file) => preciseParsers.includes(file.parser)).length;
4012
4939
  const astFiles = codeGraph.files.filter((file) => astParsers.includes(file.parser)).length;
4013
4940
  const fallbackFiles = codeGraph.files.filter((file) => file.parser === "generic-static" || file.parser === "metadata").length;
4014
- const memoryCodeEdges = knowledgeGraph.edges.filter((edge) => ["explains_symbol", "informs_symbol", "fixes_symbol", "applies_to_route", "verified_by_test"].includes(edge.relation)).length;
4941
+ const preciseMemoryCodeEdges = knowledgeGraph.edges.filter((edge) => ["explains_symbol", "informs_symbol", "fixes_symbol", "applies_to_route", "verified_by_test"].includes(edge.relation)).length;
4942
+ const pathMemoryCodeEdges = knowledgeGraph.edges.filter((edge) => edge.relation === "affects_path").length;
4943
+ const memoryCodeEdges = preciseMemoryCodeEdges + pathMemoryCodeEdges;
4015
4944
  const stalePackets = quality.totals.stale;
4016
4945
  const duplicateCandidatesTotal = quality.totals.duplicate;
4017
4946
  const structuredCoverage = percent(structuredPackets.length, approved.length);
@@ -4033,8 +4962,11 @@ function auditProject(projectDir) {
4033
4962
  if (preciseFiles < indexableFiles) {
4034
4963
  recommendations.push("Add or extend SCIP/LSIF/LSP index artifacts in CI for remaining source files; keep AST/static extraction as fallback.");
4035
4964
  }
4036
- if (!memoryCodeEdges && approved.length && codeGraph.symbols.length) {
4037
- recommendations.push("Link memory packets to symbols, routes, and tests with code_explanation, bug_fix, decision, and verification context.");
4965
+ if (!memoryCodeEdges && approved.length && codeGraph.files.length) {
4966
+ recommendations.push("Ground memory packets to repo paths, symbols, routes, or tests so recall and the viewer can bridge memory to code.");
4967
+ }
4968
+ else if (!preciseMemoryCodeEdges && pathMemoryCodeEdges && codeGraph.symbols.length) {
4969
+ recommendations.push("Path-level memory links exist; add symbol, route, or test names to high-value memories when you need precise code evidence.");
4038
4970
  }
4039
4971
  if (!validation.ok) {
4040
4972
  recommendations.push("Fix validation errors before relying on Kage in PR or agent-start workflows.");
@@ -4074,6 +5006,8 @@ function auditProject(projectDir) {
4074
5006
  },
4075
5007
  graph_links: {
4076
5008
  memory_code_edges: memoryCodeEdges,
5009
+ precise_memory_code_edges: preciseMemoryCodeEdges,
5010
+ path_memory_code_edges: pathMemoryCodeEdges,
4077
5011
  evidence_coverage_percent: percent(knowledgeGraph.edges.filter((edge) => edge.evidence.length > 0).length, knowledgeGraph.edges.length),
4078
5012
  },
4079
5013
  },
@@ -4442,6 +5376,7 @@ function kageMetricsShallow(projectDir, inputs = {}) {
4442
5376
  const knowledgeGraph = inputs.knowledgeGraph ?? buildKnowledgeGraph(projectDir, codeGraph);
4443
5377
  const validation = inputs.validation ?? validateProject(projectDir);
4444
5378
  const indexManifest = readCodeIndexManifest(projectDir);
5379
+ const structuralManifest = readStructuralIndexManifest(projectDir);
4445
5380
  const sourceFiles = codeGraph.files.filter((file) => file.kind === "source" || file.kind === "test");
4446
5381
  const indexedSourceFiles = sourceFiles.filter((file) => file.parser !== "metadata");
4447
5382
  const coverage = indexManifest.coverage.indexable_files > 0 ? indexManifest.coverage.coverage_percent : percent(indexedSourceFiles.length, sourceFiles.length);
@@ -4474,6 +5409,17 @@ function kageMetricsShallow(projectDir, inputs = {}) {
4474
5409
  cache_hits: indexManifest.cache.hits,
4475
5410
  cache_misses: indexManifest.cache.misses,
4476
5411
  },
5412
+ structural_index: {
5413
+ files: structuralManifest.files.indexed,
5414
+ symbols: structuralManifest.symbols,
5415
+ edges: structuralManifest.edges,
5416
+ metadata_only_files: structuralManifest.files.metadata_only,
5417
+ ignored_files: structuralManifest.files.ignored,
5418
+ languages: structuralManifest.languages,
5419
+ worker_count: structuralManifest.worker_count,
5420
+ cache_hits: structuralManifest.cache.hits,
5421
+ cache_misses: structuralManifest.cache.misses,
5422
+ },
4477
5423
  memory_graph: {
4478
5424
  approved_packets: loadPacketsFromDir(packetsDir(projectDir)).length,
4479
5425
  pending_packets: loadPacketsFromDir(pendingDir(projectDir)).length,
@@ -5645,7 +6591,7 @@ function prCheck(projectDir) {
5645
6591
  const rawStatus = readGit(projectDir, ["status", "--porcelain", "-uall"]) ?? "";
5646
6592
  const validation = validateProject(projectDir);
5647
6593
  const tree = gitTree(projectDir);
5648
- const codeInputHash = codeGraphInputHash(projectDir);
6594
+ const codeInputHash = currentCodeGraphInputHash(projectDir);
5649
6595
  const memoryInputHash = knowledgeGraphInputHash(projectDir, codeInputHash);
5650
6596
  const stalePackets = loadPacketsFromDir(packetsDir(projectDir))
5651
6597
  .map((packet) => ({ packet, reasons: staleMemoryReasons(projectDir, packet) }))