@vyuhlabs/dxkit 2.9.3 → 2.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/CHANGELOG.md +170 -0
  2. package/README.md +9 -0
  3. package/dist/allowlist/annotate.d.ts +71 -0
  4. package/dist/allowlist/annotate.d.ts.map +1 -0
  5. package/dist/allowlist/annotate.js +105 -0
  6. package/dist/allowlist/annotate.js.map +1 -0
  7. package/dist/allowlist/cli.d.ts +6 -0
  8. package/dist/allowlist/cli.d.ts.map +1 -1
  9. package/dist/allowlist/cli.js +70 -37
  10. package/dist/allowlist/cli.js.map +1 -1
  11. package/dist/analyzers/dashboard/index.d.ts.map +1 -1
  12. package/dist/analyzers/dashboard/index.js +6 -1
  13. package/dist/analyzers/dashboard/index.js.map +1 -1
  14. package/dist/analyzers/developer/gather.d.ts +16 -0
  15. package/dist/analyzers/developer/gather.d.ts.map +1 -1
  16. package/dist/analyzers/developer/gather.js +2 -0
  17. package/dist/analyzers/developer/gather.js.map +1 -1
  18. package/dist/analyzers/developer/ownership.d.ts +86 -0
  19. package/dist/analyzers/developer/ownership.d.ts.map +1 -0
  20. package/dist/analyzers/developer/ownership.js +180 -0
  21. package/dist/analyzers/developer/ownership.js.map +1 -0
  22. package/dist/analyzers/health.d.ts.map +1 -1
  23. package/dist/analyzers/health.js +17 -2
  24. package/dist/analyzers/health.js.map +1 -1
  25. package/dist/analyzers/quality/detailed.d.ts +5 -1
  26. package/dist/analyzers/quality/detailed.d.ts.map +1 -1
  27. package/dist/analyzers/quality/detailed.js +30 -29
  28. package/dist/analyzers/quality/detailed.js.map +1 -1
  29. package/dist/analyzers/security/actions.d.ts.map +1 -1
  30. package/dist/analyzers/security/actions.js +13 -0
  31. package/dist/analyzers/security/actions.js.map +1 -1
  32. package/dist/analyzers/security/aggregator.d.ts +18 -0
  33. package/dist/analyzers/security/aggregator.d.ts.map +1 -1
  34. package/dist/analyzers/security/aggregator.js +28 -0
  35. package/dist/analyzers/security/aggregator.js.map +1 -1
  36. package/dist/analyzers/security/detailed.d.ts +7 -1
  37. package/dist/analyzers/security/detailed.d.ts.map +1 -1
  38. package/dist/analyzers/security/detailed.js +31 -15
  39. package/dist/analyzers/security/detailed.js.map +1 -1
  40. package/dist/analyzers/security/gather.d.ts.map +1 -1
  41. package/dist/analyzers/security/gather.js +6 -0
  42. package/dist/analyzers/security/gather.js.map +1 -1
  43. package/dist/analyzers/security/index.d.ts.map +1 -1
  44. package/dist/analyzers/security/index.js +81 -2
  45. package/dist/analyzers/security/index.js.map +1 -1
  46. package/dist/analyzers/security/scanner-drift.d.ts +21 -0
  47. package/dist/analyzers/security/scanner-drift.d.ts.map +1 -0
  48. package/dist/analyzers/security/scanner-drift.js +113 -0
  49. package/dist/analyzers/security/scanner-drift.js.map +1 -0
  50. package/dist/analyzers/security/shallow.d.ts.map +1 -1
  51. package/dist/analyzers/security/shallow.js +24 -2
  52. package/dist/analyzers/security/shallow.js.map +1 -1
  53. package/dist/analyzers/security/types.d.ts +38 -0
  54. package/dist/analyzers/security/types.d.ts.map +1 -1
  55. package/dist/analyzers/tests/detailed.d.ts +5 -1
  56. package/dist/analyzers/tests/detailed.d.ts.map +1 -1
  57. package/dist/analyzers/tests/detailed.js +27 -20
  58. package/dist/analyzers/tests/detailed.js.map +1 -1
  59. package/dist/analyzers/tools/graphify.d.ts +11 -0
  60. package/dist/analyzers/tools/graphify.d.ts.map +1 -1
  61. package/dist/analyzers/tools/graphify.js +429 -413
  62. package/dist/analyzers/tools/graphify.js.map +1 -1
  63. package/dist/analyzers/tools/grep-secrets.d.ts.map +1 -1
  64. package/dist/analyzers/tools/grep-secrets.js +9 -0
  65. package/dist/analyzers/tools/grep-secrets.js.map +1 -1
  66. package/dist/analyzers/tools/osv-scanner-fix.d.ts.map +1 -1
  67. package/dist/analyzers/tools/osv-scanner-fix.js +12 -1
  68. package/dist/analyzers/tools/osv-scanner-fix.js.map +1 -1
  69. package/dist/analyzers/tools/tool-registry.d.ts.map +1 -1
  70. package/dist/analyzers/tools/tool-registry.js +78 -43
  71. package/dist/analyzers/tools/tool-registry.js.map +1 -1
  72. package/dist/analyzers/tools/walk-source-files.d.ts +10 -0
  73. package/dist/analyzers/tools/walk-source-files.d.ts.map +1 -1
  74. package/dist/analyzers/tools/walk-source-files.js +14 -0
  75. package/dist/analyzers/tools/walk-source-files.js.map +1 -1
  76. package/dist/analyzers/types.d.ts +9 -0
  77. package/dist/analyzers/types.d.ts.map +1 -1
  78. package/dist/attribution/attribute.d.ts +57 -0
  79. package/dist/attribution/attribute.d.ts.map +1 -0
  80. package/dist/attribution/attribute.js +149 -0
  81. package/dist/attribution/attribute.js.map +1 -0
  82. package/dist/baseline/entry-to-located.d.ts +12 -5
  83. package/dist/baseline/entry-to-located.d.ts.map +1 -1
  84. package/dist/baseline/entry-to-located.js +21 -7
  85. package/dist/baseline/entry-to-located.js.map +1 -1
  86. package/dist/baseline/git-aware-match.d.ts +7 -5
  87. package/dist/baseline/git-aware-match.d.ts.map +1 -1
  88. package/dist/baseline/git-aware-match.js +78 -5
  89. package/dist/baseline/git-aware-match.js.map +1 -1
  90. package/dist/cli.d.ts.map +1 -1
  91. package/dist/cli.js +53 -5
  92. package/dist/cli.js.map +1 -1
  93. package/dist/explore/context-hook.d.ts +49 -29
  94. package/dist/explore/context-hook.d.ts.map +1 -1
  95. package/dist/explore/context-hook.js +304 -29
  96. package/dist/explore/context-hook.js.map +1 -1
  97. package/dist/generator.d.ts.map +1 -1
  98. package/dist/generator.js +13 -7
  99. package/dist/generator.js.map +1 -1
  100. package/dist/ingest/snyk-policy.d.ts +22 -1
  101. package/dist/ingest/snyk-policy.d.ts.map +1 -1
  102. package/dist/ingest/snyk-policy.js +75 -18
  103. package/dist/ingest/snyk-policy.js.map +1 -1
  104. package/dist/languages/index.d.ts +28 -5
  105. package/dist/languages/index.d.ts.map +1 -1
  106. package/dist/languages/index.js +38 -7
  107. package/dist/languages/index.js.map +1 -1
  108. package/dist/languages/typescript.d.ts.map +1 -1
  109. package/dist/languages/typescript.js +19 -0
  110. package/dist/languages/typescript.js.map +1 -1
  111. package/dist/reviewers-cli.d.ts +57 -0
  112. package/dist/reviewers-cli.d.ts.map +1 -0
  113. package/dist/reviewers-cli.js +263 -0
  114. package/dist/reviewers-cli.js.map +1 -0
  115. package/dist/scoring/dimensions/security.d.ts +17 -0
  116. package/dist/scoring/dimensions/security.d.ts.map +1 -1
  117. package/dist/scoring/dimensions/security.js +12 -0
  118. package/dist/scoring/dimensions/security.js.map +1 -1
  119. package/package.json +1 -1
  120. package/templates/.claude/skills/dxkit-action/SKILL.md +13 -2
  121. package/templates/.claude/skills/dxkit-allowlist/SKILL.md +9 -0
  122. package/templates/.claude/skills/dxkit-onboard/SKILL.md +2 -2
  123. package/templates/.claude/skills/dxkit-pr/SKILL.md +22 -1
@@ -34,6 +34,7 @@ var __importStar = (this && this.__importStar) || (function () {
34
34
  })();
35
35
  Object.defineProperty(exports, "__esModule", { value: true });
36
36
  exports.graphifyProvider = void 0;
37
+ exports.buildGraphifyScript = buildGraphifyScript;
37
38
  exports.gatherGraphifyResult = gatherGraphifyResult;
38
39
  exports.gatherGraphifyGraph = gatherGraphifyGraph;
39
40
  exports.buildGraphifyEnvelope = buildGraphifyEnvelope;
@@ -62,17 +63,23 @@ const tool_registry_1 = require("./tool-registry");
62
63
  const exclusions_1 = require("./exclusions");
63
64
  const paths_1 = require("./paths");
64
65
  const types_1 = require("../../explore/types");
65
- /** Build the graphify Python script with cwd-specific exclusions baked in. */
66
+ /**
67
+ * Build the graphify Python script with cwd-specific exclusions baked in.
68
+ *
69
+ * Exported so the structural contract of the generated script — the
70
+ * `if __name__ == '__main__'` guard that keeps ProcessPoolExecutor workers
71
+ * from re-running extraction under spawn/forkserver (Python 3.14's Linux
72
+ * default), and the public `extract(cache_root=...)` cache redirect that
73
+ * replaced the fragile `cache_dir` monkeypatch — is unit-testable without a
74
+ * Python interpreter or graphify installed (mirrors `buildGraphifyEnvelope`).
75
+ */
66
76
  function buildGraphifyScript(cwd) {
67
77
  const { dirsSet, pathsList, fileGlobsList } = (0, exclusions_1.getPythonExcludeFilter)(cwd);
68
78
  return `# Exclusion set derived from src/analyzers/tools/exclusions.ts
69
- import json, sys, os, tempfile
79
+ import json, sys, os
70
80
  from pathlib import Path
71
81
  from collections import Counter
72
82
 
73
- # Redirect graphify cache to /tmp so we don't pollute the target repo
74
- _cache_dir = Path(tempfile.mkdtemp(prefix='dxkit-graphify-'))
75
-
76
83
  try:
77
84
  from graphify.extract import extract, collect_files
78
85
  from graphify.build import build
@@ -82,17 +89,6 @@ except ImportError:
82
89
  print(json.dumps({"error": "graphify not installed"}))
83
90
  sys.exit(0)
84
91
 
85
- # Redirect graphify's on-disk cache BEFORE any graphify function runs.
86
- # collect_files() eagerly resolves cache_dir() during enumeration, so
87
- # the patch has to land before the first graphify call — not after.
88
- # Pre-patch, a 'graphify-out/cache/' directory was created in the
89
- # customer's repo every time the analyzer touched a project.
90
- import graphify.cache as _gc
91
- _gc.cache_dir = lambda root=None: _cache_dir / "cache"
92
- (_cache_dir / "cache").mkdir(parents=True, exist_ok=True)
93
-
94
- target = Path(sys.argv[1])
95
-
96
92
  # Three-axis exclusion. EXCLUDE_DIRS is basename-only (any path
97
93
  # segment matching skips the file). EXCLUDE_PATHS holds multi-segment
98
94
  # relative paths from .dxkit-ignore (e.g. 'app/modules/plugins/VendorPlugin')
@@ -274,407 +270,418 @@ def _strip_paren_suffix(label):
274
270
  s = s.rsplit('.', 1)[1]
275
271
  return s
276
272
 
277
- all_files = collect_files(target)
278
- files = [f for f in all_files if not _is_excluded(f)]
279
- if not files:
280
- print(json.dumps({"error": "no files found"}))
281
- sys.exit(0)
282
-
283
- # Suppress progress output by redirecting stdout during extraction
284
- import io
285
- _real_stdout = sys.stdout
286
- sys.stdout = io.StringIO()
287
- result = extract(files)
288
- sys.stdout = _real_stdout
289
- G = build([result], directed=True)
290
- communities = cluster(G)
291
-
292
- # Functions vs modules
293
- nodes = list(G.nodes(data=True))
294
- functions = [(n, d) for n, d in nodes if "()" in d.get("label", "")]
295
- modules = [(n, d) for n, d in nodes if "()" not in d.get("label", "")]
296
-
297
- # Functions per file
298
- file_funcs = Counter()
299
- for n, d in functions:
300
- sf = d.get("source_file", "")
301
- file_funcs[sf] += 1
302
-
303
- max_file = file_funcs.most_common(1)[0] if file_funcs else ("", 0)
304
-
305
- # God nodes: graphifyy@0.5.0 renamed the result key "edges" → "degree".
306
- gods = god_nodes(G, top_n=50)
307
- god_count = sum(1 for g in gods if g["degree"] > 15)
308
-
309
- # Cohesion
310
- scores = score_all(G, communities) if communities else {}
311
- avg_cohesion = sum(scores.values()) / len(scores) if scores else 0.0
312
-
313
- # Orphan modules (no inbound imports)
314
- import_targets = set()
315
- for u, v, data in G.edges(data=True):
316
- if data.get("relation") == "imports_from":
317
- import_targets.add(v)
318
- module_ids = set(n for n, d in modules)
319
- orphans = module_ids - import_targets
320
-
321
- # Dead imports (imported but never called)
322
- call_targets = set()
323
- for u, v, data in G.edges(data=True):
324
- if data.get("relation") == "calls":
325
- call_targets.add(v)
326
- dead = import_targets - call_targets - module_ids
327
-
328
- # Commented code ratio: source files with 0 function/class AST nodes
329
- source_files_set = set()
330
- files_with_nodes = set()
331
- for n, d in nodes:
332
- sf = d.get("source_file", "")
333
- if sf:
334
- source_files_set.add(sf)
335
- if "()" in d.get("label", "") or any(
336
- data.get("relation") == "method"
337
- for _, _, data in G.edges(n, data=True)
338
- ):
339
- files_with_nodes.add(sf)
340
-
341
- total_src = len(source_files_set)
342
- empty_files = total_src - len(files_with_nodes)
343
- commented_ratio = empty_files / total_src if total_src > 0 else 0.0
344
-
345
-
346
- # ── Build the full graph artifact ────────────────────────────────────────────
347
- # 2.7 Sprint 1: emit nodes / edges / communities / symbolIndex alongside
348
- # the aggregate metrics. Consumers (explore CLI, dashboard viz, future
349
- # 2.8 context CLI + reachability) read this via src/explore/load.ts.
350
- # Schema contract documented in tmp/2.7-graph-json-schema.md.
351
-
352
- # Determine class membership: a module-shaped node is a CLASS if it has
353
- # outbound 'method' edges to other nodes (it's the owner). A function-
354
- # shaped node ("()" in label) is a METHOD if it has inbound 'method'
355
- # edges from a class node; otherwise it's a free FUNCTION.
356
- _class_owners = set()
357
- _method_members = set()
358
- for u, v, data in G.edges(data=True):
359
- if data.get("relation") == "method":
360
- _class_owners.add(u)
361
- _method_members.add(v)
362
-
363
- def _node_kind(nid, attrs):
364
- label = attrs.get('label', '')
365
- is_callable = '()' in label
366
- if is_callable:
367
- return 'method' if nid in _method_members else 'function'
368
- return 'class' if nid in _class_owners else 'module'
369
-
370
- # Make node sourceFile paths project-relative (graphify emits absolute
371
- # paths derived from \`target = sys.argv[1]\`). Mirrors the existing
372
- # maxFunctionsFilePath path-normalization at the TS layer.
373
- def _rel(p):
374
- if not p:
375
- return ''
376
- s = str(p).replace(os.sep, '/')
377
- t = str(target).replace(os.sep, '/').rstrip('/')
378
- if s.startswith(t + '/'):
379
- return s[len(t) + 1:]
380
- if s == t:
381
- return ''
382
- return s
383
-
384
- # Assign stable in-run ids: n0, n1, n2, ... in extraction order. The
385
- # graphify-internal id strings (long underscored slugs) work but bloat
386
- # the JSON by ~20 bytes per node; the n<idx> shortening saves ~50KB on
387
- # a 13k-node repo. IDs are NOT stable across runs (per schema doc).
388
- _id_remap = {}
389
- graph_nodes = []
390
- for idx, (nid, attrs) in enumerate(nodes):
391
- short_id = f'n{idx}'
392
- _id_remap[nid] = short_id
393
- line_no = _parse_line_no(attrs)
394
- rel_source = _rel(attrs.get('source_file', ''))
395
- label = attrs.get('label', '')
396
- name = _strip_paren_suffix(label)
397
- kind = _node_kind(nid, attrs)
398
- node_obj = {
399
- 'id': short_id,
400
- 'kind': kind,
401
- 'label': label,
402
- 'sourceFile': rel_source,
403
- }
404
- if line_no:
405
- node_obj['line'] = line_no
406
- # Export detection only meaningful for symbol-bearing kinds
407
- # (functions, classes, methods). Module-level "is this file
408
- # exported?" isn't a useful question — exclude.
409
- if kind in ('function', 'class', 'method'):
410
- # Resolve to absolute path for the file-line cache (we read
411
- # the raw source content; the cache key is the actual path
412
- # on disk, not the project-relative form).
413
- abs_source = attrs.get('source_file', '')
414
- exported = _detect_exported(abs_source, line_no, name)
415
- if exported is not None:
416
- node_obj['exported'] = exported
417
- graph_nodes.append(node_obj)
418
-
419
- # Edges remapped to short ids. Drop self-loops and edges where either
420
- # endpoint was filtered out (defensive — graphify shouldn't produce them
421
- # but be tolerant). Graphify emits both 'imports' (broad form: \`import X\`)
422
- # and 'imports_from' (\`from X import Y\` / \`import {Y} from X\`); both
423
- # carry the same semantic for our schema ("A imports from B"). Merge
424
- # both into the canonical 'imports_from' edge relation. The 'contains'
425
- # and 'inherits' relations graphify also produces are intentionally
426
- # dropped 'contains' duplicates the file/symbol-membership info
427
- # already encoded in nodes' sourceFile field, and 'inherits' is
428
- # class-inheritance which isn't yet a first-class schema relation.
429
- graph_edges = []
430
- for u, v, data in G.edges(data=True):
431
- if u not in _id_remap or v not in _id_remap:
432
- continue
433
- graphify_relation = data.get('relation', '')
434
- if graphify_relation == 'calls':
435
- relation = 'calls'
436
- elif graphify_relation in ('imports', 'imports_from'):
437
- relation = 'imports_from'
438
- elif graphify_relation == 'method':
439
- relation = 'method'
440
- else:
441
- continue
442
- edge_obj = {
443
- 'from': _id_remap[u],
444
- 'to': _id_remap[v],
445
- 'relation': relation,
273
+ if __name__ == '__main__':
274
+ # ProcessPoolExecutor workers re-import this module under spawn/
275
+ # forkserver (the Python 3.14 default on Linux); the __main__ guard
276
+ # keeps extraction from re-running per worker. graphify's own
277
+ # _extract_parallel requires this guard (it warns BrokenProcessPool
278
+ # and dies without it). See graphify/extract.py:_extract_parallel.
279
+ target = Path(sys.argv[1])
280
+ # graphify's on-disk cache is redirected here (the public cache_root
281
+ # param passed to extract() below) so it never lands in the target
282
+ # repo. The TS caller owns this dir's lifecycle — it lives under the
283
+ # ephemeral scriptDir and is removed after this process fully exits,
284
+ # which is the only point that survives graphify's atexit stat-index
285
+ # flush (graphify/cache.py registers _flush_stat_index at exit, so a
286
+ # Python-side rmtree here would be undone by that post-exit write).
287
+ _cache_dir = Path(sys.argv[2])
288
+ all_files = collect_files(target)
289
+ files = [f for f in all_files if not _is_excluded(f)]
290
+ if not files:
291
+ print(json.dumps({"error": "no files found"}))
292
+ sys.exit(0)
293
+
294
+ # Suppress progress output by redirecting stdout during extraction
295
+ import io
296
+ _real_stdout = sys.stdout
297
+ sys.stdout = io.StringIO()
298
+ result = extract(files, cache_root=_cache_dir)
299
+ sys.stdout = _real_stdout
300
+ G = build([result], directed=True)
301
+ communities = cluster(G)
302
+
303
+ # Functions vs modules
304
+ nodes = list(G.nodes(data=True))
305
+ functions = [(n, d) for n, d in nodes if "()" in d.get("label", "")]
306
+ modules = [(n, d) for n, d in nodes if "()" not in d.get("label", "")]
307
+
308
+ # Functions per file
309
+ file_funcs = Counter()
310
+ for n, d in functions:
311
+ sf = d.get("source_file", "")
312
+ file_funcs[sf] += 1
313
+
314
+ max_file = file_funcs.most_common(1)[0] if file_funcs else ("", 0)
315
+
316
+ # God nodes: graphifyy@0.5.0 renamed the result key "edges" → "degree".
317
+ gods = god_nodes(G, top_n=50)
318
+ god_count = sum(1 for g in gods if g["degree"] > 15)
319
+
320
+ # Cohesion
321
+ scores = score_all(G, communities) if communities else {}
322
+ avg_cohesion = sum(scores.values()) / len(scores) if scores else 0.0
323
+
324
+ # Orphan modules (no inbound imports)
325
+ import_targets = set()
326
+ for u, v, data in G.edges(data=True):
327
+ if data.get("relation") == "imports_from":
328
+ import_targets.add(v)
329
+ module_ids = set(n for n, d in modules)
330
+ orphans = module_ids - import_targets
331
+
332
+ # Dead imports (imported but never called)
333
+ call_targets = set()
334
+ for u, v, data in G.edges(data=True):
335
+ if data.get("relation") == "calls":
336
+ call_targets.add(v)
337
+ dead = import_targets - call_targets - module_ids
338
+
339
+ # Commented code ratio: source files with 0 function/class AST nodes
340
+ source_files_set = set()
341
+ files_with_nodes = set()
342
+ for n, d in nodes:
343
+ sf = d.get("source_file", "")
344
+ if sf:
345
+ source_files_set.add(sf)
346
+ if "()" in d.get("label", "") or any(
347
+ data.get("relation") == "method"
348
+ for _, _, data in G.edges(n, data=True)
349
+ ):
350
+ files_with_nodes.add(sf)
351
+
352
+ total_src = len(source_files_set)
353
+ empty_files = total_src - len(files_with_nodes)
354
+ commented_ratio = empty_files / total_src if total_src > 0 else 0.0
355
+
356
+
357
+ # ── Build the full graph artifact ────────────────────────────────────────────
358
+ # 2.7 Sprint 1: emit nodes / edges / communities / symbolIndex alongside
359
+ # the aggregate metrics. Consumers (explore CLI, dashboard viz, future
360
+ # 2.8 context CLI + reachability) read this via src/explore/load.ts.
361
+ # Schema contract documented in tmp/2.7-graph-json-schema.md.
362
+
363
+ # Determine class membership: a module-shaped node is a CLASS if it has
364
+ # outbound 'method' edges to other nodes (it's the owner). A function-
365
+ # shaped node ("()" in label) is a METHOD if it has inbound 'method'
366
+ # edges from a class node; otherwise it's a free FUNCTION.
367
+ _class_owners = set()
368
+ _method_members = set()
369
+ for u, v, data in G.edges(data=True):
370
+ if data.get("relation") == "method":
371
+ _class_owners.add(u)
372
+ _method_members.add(v)
373
+
374
+ def _node_kind(nid, attrs):
375
+ label = attrs.get('label', '')
376
+ is_callable = '()' in label
377
+ if is_callable:
378
+ return 'method' if nid in _method_members else 'function'
379
+ return 'class' if nid in _class_owners else 'module'
380
+
381
+ # Make node sourceFile paths project-relative (graphify emits absolute
382
+ # paths derived from \`target = sys.argv[1]\`). Mirrors the existing
383
+ # maxFunctionsFilePath path-normalization at the TS layer.
384
+ def _rel(p):
385
+ if not p:
386
+ return ''
387
+ s = str(p).replace(os.sep, '/')
388
+ t = str(target).replace(os.sep, '/').rstrip('/')
389
+ if s.startswith(t + '/'):
390
+ return s[len(t) + 1:]
391
+ if s == t:
392
+ return ''
393
+ return s
394
+
395
+ # Assign stable in-run ids: n0, n1, n2, ... in extraction order. The
396
+ # graphify-internal id strings (long underscored slugs) work but bloat
397
+ # the JSON by ~20 bytes per node; the n<idx> shortening saves ~50KB on
398
+ # a 13k-node repo. IDs are NOT stable across runs (per schema doc).
399
+ _id_remap = {}
400
+ graph_nodes = []
401
+ for idx, (nid, attrs) in enumerate(nodes):
402
+ short_id = f'n{idx}'
403
+ _id_remap[nid] = short_id
404
+ line_no = _parse_line_no(attrs)
405
+ rel_source = _rel(attrs.get('source_file', ''))
406
+ label = attrs.get('label', '')
407
+ name = _strip_paren_suffix(label)
408
+ kind = _node_kind(nid, attrs)
409
+ node_obj = {
410
+ 'id': short_id,
411
+ 'kind': kind,
412
+ 'label': label,
413
+ 'sourceFile': rel_source,
414
+ }
415
+ if line_no:
416
+ node_obj['line'] = line_no
417
+ # Export detection only meaningful for symbol-bearing kinds
418
+ # (functions, classes, methods). Module-level "is this file
419
+ # exported?" isn't a useful question exclude.
420
+ if kind in ('function', 'class', 'method'):
421
+ # Resolve to absolute path for the file-line cache (we read
422
+ # the raw source content; the cache key is the actual path
423
+ # on disk, not the project-relative form).
424
+ abs_source = attrs.get('source_file', '')
425
+ exported = _detect_exported(abs_source, line_no, name)
426
+ if exported is not None:
427
+ node_obj['exported'] = exported
428
+ graph_nodes.append(node_obj)
429
+
430
+ # Edges remapped to short ids. Drop self-loops and edges where either
431
+ # endpoint was filtered out (defensive — graphify shouldn't produce them
432
+ # but be tolerant). Graphify emits both 'imports' (broad form: \`import X\`)
433
+ # and 'imports_from' (\`from X import Y\` / \`import {Y} from X\`); both
434
+ # carry the same semantic for our schema ("A imports from B"). Merge
435
+ # both into the canonical 'imports_from' edge relation. The 'contains'
436
+ # and 'inherits' relations graphify also produces are intentionally
437
+ # dropped — 'contains' duplicates the file/symbol-membership info
438
+ # already encoded in nodes' sourceFile field, and 'inherits' is
439
+ # class-inheritance which isn't yet a first-class schema relation.
440
+ graph_edges = []
441
+ for u, v, data in G.edges(data=True):
442
+ if u not in _id_remap or v not in _id_remap:
443
+ continue
444
+ graphify_relation = data.get('relation', '')
445
+ if graphify_relation == 'calls':
446
+ relation = 'calls'
447
+ elif graphify_relation in ('imports', 'imports_from'):
448
+ relation = 'imports_from'
449
+ elif graphify_relation == 'method':
450
+ relation = 'method'
451
+ else:
452
+ continue
453
+ edge_obj = {
454
+ 'from': _id_remap[u],
455
+ 'to': _id_remap[v],
456
+ 'relation': relation,
457
+ }
458
+ graph_edges.append(edge_obj)
459
+
460
+ # Communities: for each cluster compute dominantSourceDir + dominantPack.
461
+ # dominantSourceDir = most common ancestor directory (the longest
462
+ # leading-segment path that >= 40% of members share); empty string when
463
+ # no clear dominant. dominantPack = most common pack id among member
464
+ # files' extensions; empty when no dominant pack.
465
+ def _ancestor_dir(rel_path):
466
+ if not rel_path or '/' not in rel_path:
467
+ return ''
468
+ return rel_path.rsplit('/', 1)[0] + '/'
469
+
470
+ graph_communities = []
471
+ # Graphify's cluster() returns dict[community_id: list[node_id]].
472
+ # Iterate via .items(); the community_id is the actual cluster
473
+ # identifier (used to look up cohesion in scores), members is the
474
+ # node-id list.
475
+ _node_attrs_by_id = dict(nodes)
476
+ for cidx, member_list in communities.items():
477
+ member_ids = sorted(_id_remap.get(n, '') for n in member_list if n in _id_remap)
478
+ member_ids = [m for m in member_ids if m]
479
+ if not member_ids:
480
+ continue
481
+ # Per-member source files (project-relative)
482
+ member_files = []
483
+ for nid in member_list:
484
+ if nid in _id_remap:
485
+ sf = _rel(_node_attrs_by_id.get(nid, {}).get('source_file', ''))
486
+ if sf:
487
+ member_files.append(sf)
488
+ # Dominant directory: longest common ancestor that >= 40% of
489
+ # members share (or empty if no clear winner).
490
+ dir_counter = Counter(_ancestor_dir(f) for f in member_files)
491
+ dir_counter.pop('', None)
492
+ dominant_dir = ''
493
+ if dir_counter:
494
+ top_dir, top_count = dir_counter.most_common(1)[0]
495
+ if top_count / len(member_files) >= 0.4:
496
+ dominant_dir = top_dir
497
+ # Dominant pack
498
+ pack_counter = Counter()
499
+ for f in member_files:
500
+ pk = _EXT_TO_PACK.get(_ext_of(f))
501
+ if pk:
502
+ pack_counter[pk] += 1
503
+ dominant_pack = ''
504
+ if pack_counter:
505
+ top_pack, top_pack_count = pack_counter.most_common(1)[0]
506
+ if top_pack_count / max(1, len(member_files)) >= 0.5:
507
+ dominant_pack = top_pack
508
+ cohesion = float(scores.get(cidx, 0.0)) if scores else 0.0
509
+ graph_communities.append({
510
+ 'id': cidx,
511
+ 'nodeIds': member_ids,
512
+ 'cohesion': round(cohesion, 3),
513
+ 'dominantSourceDir': dominant_dir,
514
+ 'dominantPack': dominant_pack,
515
+ })
516
+
517
+ # Symbol index: lowercased label (without trailing ()) → list of nodeIds.
518
+ _symbol_index = {}
519
+ for node_obj in graph_nodes:
520
+ key = _strip_paren_suffix(node_obj['label']).lower()
521
+ if not key:
522
+ continue
523
+ _symbol_index.setdefault(key, []).append(node_obj['id'])
524
+
525
+ # Active-pack detection: derive from extensions seen in source files.
526
+ _packs_seen = sorted({_EXT_TO_PACK[e] for e in (_ext_of(_rel(d.get('source_file', '')))
527
+ for _, d in nodes)
528
+ if e in _EXT_TO_PACK})
529
+
530
+ # Size-budget enforcement. Hard cap 50MB serialized. If we exceed,
531
+ # drop method edges first (densest class — structural noise, doesn't
532
+ # affect call-graph queries).
533
+ import datetime as _dt
534
+ _meta = {
535
+ 'tool': 'graphify',
536
+ 'graphifyVersion': '', # filled by TS-side post-parse (read from graphifyy package version)
537
+ 'dxkitVersion': '', # filled by TS-side post-parse (read from package.json)
538
+ 'generatedAt': _dt.datetime.now(_dt.timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ'),
539
+ 'sourceFilesInGraph': total_src,
540
+ 'excludedFileCount': len(all_files) - len(files),
541
+ 'packs': _packs_seen,
542
+ 'truncated': False,
543
+ 'truncatedReason': '',
446
544
  }
447
- graph_edges.append(edge_obj)
448
-
449
- # Communities: for each cluster compute dominantSourceDir + dominantPack.
450
- # dominantSourceDir = most common ancestor directory (the longest
451
- # leading-segment path that >= 40% of members share); empty string when
452
- # no clear dominant. dominantPack = most common pack id among member
453
- # files' extensions; empty when no dominant pack.
454
- def _ancestor_dir(rel_path):
455
- if not rel_path or '/' not in rel_path:
456
- return ''
457
- return rel_path.rsplit('/', 1)[0] + '/'
458
-
459
- graph_communities = []
460
- # Graphify's cluster() returns dict[community_id: list[node_id]].
461
- # Iterate via .items(); the community_id is the actual cluster
462
- # identifier (used to look up cohesion in scores), members is the
463
- # node-id list.
464
- _node_attrs_by_id = dict(nodes)
465
- for cidx, member_list in communities.items():
466
- member_ids = sorted(_id_remap.get(n, '') for n in member_list if n in _id_remap)
467
- member_ids = [m for m in member_ids if m]
468
- if not member_ids:
469
- continue
470
- # Per-member source files (project-relative)
471
- member_files = []
472
- for nid in member_list:
473
- if nid in _id_remap:
474
- sf = _rel(_node_attrs_by_id.get(nid, {}).get('source_file', ''))
475
- if sf:
476
- member_files.append(sf)
477
- # Dominant directory: longest common ancestor that >= 40% of
478
- # members share (or empty if no clear winner).
479
- dir_counter = Counter(_ancestor_dir(f) for f in member_files)
480
- dir_counter.pop('', None)
481
- dominant_dir = ''
482
- if dir_counter:
483
- top_dir, top_count = dir_counter.most_common(1)[0]
484
- if top_count / len(member_files) >= 0.4:
485
- dominant_dir = top_dir
486
- # Dominant pack
487
- pack_counter = Counter()
488
- for f in member_files:
489
- pk = _EXT_TO_PACK.get(_ext_of(f))
490
- if pk:
491
- pack_counter[pk] += 1
492
- dominant_pack = ''
493
- if pack_counter:
494
- top_pack, top_pack_count = pack_counter.most_common(1)[0]
495
- if top_pack_count / max(1, len(member_files)) >= 0.5:
496
- dominant_pack = top_pack
497
- cohesion = float(scores.get(cidx, 0.0)) if scores else 0.0
498
- graph_communities.append({
499
- 'id': cidx,
500
- 'nodeIds': member_ids,
501
- 'cohesion': round(cohesion, 3),
502
- 'dominantSourceDir': dominant_dir,
503
- 'dominantPack': dominant_pack,
504
- })
505
-
506
- # Symbol index: lowercased label (without trailing ()) → list of nodeIds.
507
- _symbol_index = {}
508
- for node_obj in graph_nodes:
509
- key = _strip_paren_suffix(node_obj['label']).lower()
510
- if not key:
511
- continue
512
- _symbol_index.setdefault(key, []).append(node_obj['id'])
513
-
514
- # Active-pack detection: derive from extensions seen in source files.
515
- _packs_seen = sorted({_EXT_TO_PACK[e] for e in (_ext_of(_rel(d.get('source_file', '')))
516
- for _, d in nodes)
517
- if e in _EXT_TO_PACK})
518
-
519
- # Size-budget enforcement. Hard cap 50MB serialized. If we exceed,
520
- # drop method edges first (densest class — structural noise, doesn't
521
- # affect call-graph queries).
522
- import datetime as _dt
523
- _meta = {
524
- 'tool': 'graphify',
525
- 'graphifyVersion': '', # filled by TS-side post-parse (read from graphifyy package version)
526
- 'dxkitVersion': '', # filled by TS-side post-parse (read from package.json)
527
- 'generatedAt': _dt.datetime.now(_dt.timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ'),
528
- 'sourceFilesInGraph': total_src,
529
- 'excludedFileCount': len(all_files) - len(files),
530
- 'packs': _packs_seen,
531
- 'truncated': False,
532
- 'truncatedReason': '',
533
- }
534
545
 
535
- _graph_payload = {
536
- 'schemaVersion': 1,
537
- 'meta': _meta,
538
- 'nodes': graph_nodes,
539
- 'edges': graph_edges,
540
- 'communities': graph_communities,
541
- 'symbolIndex': _symbol_index,
542
- }
546
+ _graph_payload = {
547
+ 'schemaVersion': 1,
548
+ 'meta': _meta,
549
+ 'nodes': graph_nodes,
550
+ 'edges': graph_edges,
551
+ 'communities': graph_communities,
552
+ 'symbolIndex': _symbol_index,
553
+ }
543
554
 
544
- # Cheap pre-check on size: serialize once, measure, drop method edges
545
- # if over the cap, re-serialize. The 50MB cap matches the schema
546
- # contract; 10MB soft target is informational only (no enforcement).
547
- _BYTES_HARD_CAP = 50 * 1024 * 1024
548
-
549
- def _serialize(payload):
550
- return json.dumps(payload, separators=(',', ':'))
551
-
552
- _graph_json = _serialize(_graph_payload)
553
- if len(_graph_json.encode('utf-8')) > _BYTES_HARD_CAP:
554
- # Drop method edges first; they're structural (class-owns-method),
555
- # not behavioral. Call + import edges carry the actionable info.
556
- pre_count = len(_graph_payload['edges'])
557
- _graph_payload['edges'] = [e for e in _graph_payload['edges']
558
- if e['relation'] != 'method']
559
- post_count = len(_graph_payload['edges'])
560
- _meta['truncated'] = True
561
- _meta['truncatedReason'] = (
562
- f"dropped {pre_count - post_count} method edges to fit under "
563
- f"the 50MB hard cap"
564
- )
565
-
566
- # Render the interactive viewer alongside graph.json so the dashboard
567
- # Graph tab can embed it. graphify ships its own vis.js-based renderer
568
- # (graphify.export.to_html). Two emission paths:
569
- #
570
- # - Full graph (G.number_of_nodes() <= MAX_NODES_FOR_VIZ = 5000):
571
- # pass the original G + communities. The viewer renders every
572
- # symbol; the user can zoom + drill.
573
- #
574
- # - Aggregated community view (G > MAX_NODES_FOR_VIZ): build a
575
- # networkx super-graph whose nodes ARE the communities. Sized by
576
- # member count via graphify member_counts parameter. Inter-
577
- # community edges aggregated to weighted edges. This lets a
578
- # customer-scale repo still get a meaningful "what does this
579
- # codebase look like" viz instead of a dead empty-state.
580
- #
581
- # Either way failures are non-fatal: the dashboard surfaces a clear
582
- # empty-state when graph.html isn't on disk.
583
- try:
584
- from graphify.export import to_html as _to_html, MAX_NODES_FOR_VIZ as _MAX_VIZ
585
- import networkx as _nx
586
- _html_dir = target / '.dxkit' / 'reports'
587
- _html_dir.mkdir(parents=True, exist_ok=True)
588
- _html_path = _html_dir / 'graph.html'
589
-
590
- if G.number_of_nodes() <= _MAX_VIZ:
591
- _labels = {
592
- c['id']: (c.get('dominantSourceDir') or f"community-{c['id']}")
593
- for c in graph_communities
594
- }
595
- _to_html(G, communities, str(_html_path), community_labels=_labels)
596
- _viz_mode = 'full'
597
- else:
598
- # Aggregated community super-graph.
599
- _node_to_comm = {}
600
- for _cid, _members in communities.items():
601
- for _nid in _members:
602
- _node_to_comm[_nid] = _cid
603
-
604
- _G_agg = _nx.DiGraph()
605
- _member_counts = {}
606
- _labels = {}
607
- for _c in graph_communities:
608
- _cid = _c['id']
609
- _label = _c.get('dominantSourceDir') or f"community-{_cid}"
610
- # vis.js node attrs: label drives display; file_type is
611
- # surfaced in graphify's sidebar so we set a sentinel
612
- # value the dashboard can grep on.
613
- _G_agg.add_node(_cid, label=_label, source_file='', file_type='community')
614
- _member_counts[_cid] = len(_c['nodeIds'])
615
- _labels[_cid] = _label
616
-
617
- # Cross-community edge aggregation. Counter keyed on
618
- # (smaller_id, larger_id) for undirected aggregation; we then
619
- # add a directed edge in one canonical direction so vis.js
620
- # has a definite source/target. The viewer doesn't show
621
- # arrows on these (they're community connections, not calls).
622
- from collections import Counter as _CommCounter
623
- _edge_w = _CommCounter()
624
- for _u, _v, _ in G.edges(data=True):
625
- _cu = _node_to_comm.get(_u)
626
- _cv = _node_to_comm.get(_v)
627
- if _cu is None or _cv is None or _cu == _cv:
628
- continue
629
- _key = (_cu, _cv) if _cu < _cv else (_cv, _cu)
630
- _edge_w[_key] += 1
631
- for (_a, _b), _w in _edge_w.items():
632
- _G_agg.add_edge(_a, _b, relation='inter_community', occurrences=_w)
633
-
634
- # to_html requires a communities dict; one-element groups
635
- # treat each aggregated node as its own community so each
636
- # community keeps a distinct color in graphify's palette.
637
- _agg_groups = {_cid: [_cid] for _cid in communities}
638
-
639
- _to_html(
640
- _G_agg, _agg_groups, str(_html_path),
641
- community_labels=_labels, member_counts=_member_counts,
555
+ # Cheap pre-check on size: serialize once, measure, drop method edges
556
+ # if over the cap, re-serialize. The 50MB cap matches the schema
557
+ # contract; 10MB soft target is informational only (no enforcement).
558
+ _BYTES_HARD_CAP = 50 * 1024 * 1024
559
+
560
+ def _serialize(payload):
561
+ return json.dumps(payload, separators=(',', ':'))
562
+
563
+ _graph_json = _serialize(_graph_payload)
564
+ if len(_graph_json.encode('utf-8')) > _BYTES_HARD_CAP:
565
+ # Drop method edges first; they're structural (class-owns-method),
566
+ # not behavioral. Call + import edges carry the actionable info.
567
+ pre_count = len(_graph_payload['edges'])
568
+ _graph_payload['edges'] = [e for e in _graph_payload['edges']
569
+ if e['relation'] != 'method']
570
+ post_count = len(_graph_payload['edges'])
571
+ _meta['truncated'] = True
572
+ _meta['truncatedReason'] = (
573
+ f"dropped {pre_count - post_count} method edges to fit under "
574
+ f"the 50MB hard cap"
642
575
  )
643
- _viz_mode = 'aggregated'
644
-
645
- # Sidecar so the dashboard renderer can label the view honestly.
646
- # JSON is tiny (~120B); avoids parsing graph.json twice from TS.
647
- _meta_path = _html_dir / 'graph.html.meta.json'
648
- _meta_path.write_text(json.dumps({
649
- 'mode': _viz_mode,
650
- 'totalNodes': G.number_of_nodes(),
651
- 'totalEdges': G.number_of_edges(),
652
- 'communities': len(communities),
653
- 'aggregatedNodeCount': len(communities) if _viz_mode == 'aggregated' else None,
576
+
577
+ # Render the interactive viewer alongside graph.json so the dashboard
578
+ # Graph tab can embed it. graphify ships its own vis.js-based renderer
579
+ # (graphify.export.to_html). Two emission paths:
580
+ #
581
+ # - Full graph (G.number_of_nodes() <= MAX_NODES_FOR_VIZ = 5000):
582
+ # pass the original G + communities. The viewer renders every
583
+ # symbol; the user can zoom + drill.
584
+ #
585
+ # - Aggregated community view (G > MAX_NODES_FOR_VIZ): build a
586
+ # networkx super-graph whose nodes ARE the communities. Sized by
587
+ # member count via graphify member_counts parameter. Inter-
588
+ # community edges aggregated to weighted edges. This lets a
589
+ # customer-scale repo still get a meaningful "what does this
590
+ # codebase look like" viz instead of a dead empty-state.
591
+ #
592
+ # Either way failures are non-fatal: the dashboard surfaces a clear
593
+ # empty-state when graph.html isn't on disk.
594
+ try:
595
+ from graphify.export import to_html as _to_html, MAX_NODES_FOR_VIZ as _MAX_VIZ
596
+ import networkx as _nx
597
+ _html_dir = target / '.dxkit' / 'reports'
598
+ _html_dir.mkdir(parents=True, exist_ok=True)
599
+ _html_path = _html_dir / 'graph.html'
600
+
601
+ if G.number_of_nodes() <= _MAX_VIZ:
602
+ _labels = {
603
+ c['id']: (c.get('dominantSourceDir') or f"community-{c['id']}")
604
+ for c in graph_communities
605
+ }
606
+ _to_html(G, communities, str(_html_path), community_labels=_labels)
607
+ _viz_mode = 'full'
608
+ else:
609
+ # Aggregated community super-graph.
610
+ _node_to_comm = {}
611
+ for _cid, _members in communities.items():
612
+ for _nid in _members:
613
+ _node_to_comm[_nid] = _cid
614
+
615
+ _G_agg = _nx.DiGraph()
616
+ _member_counts = {}
617
+ _labels = {}
618
+ for _c in graph_communities:
619
+ _cid = _c['id']
620
+ _label = _c.get('dominantSourceDir') or f"community-{_cid}"
621
+ # vis.js node attrs: label drives display; file_type is
622
+ # surfaced in graphify's sidebar so we set a sentinel
623
+ # value the dashboard can grep on.
624
+ _G_agg.add_node(_cid, label=_label, source_file='', file_type='community')
625
+ _member_counts[_cid] = len(_c['nodeIds'])
626
+ _labels[_cid] = _label
627
+
628
+ # Cross-community edge aggregation. Counter keyed on
629
+ # (smaller_id, larger_id) for undirected aggregation; we then
630
+ # add a directed edge in one canonical direction so vis.js
631
+ # has a definite source/target. The viewer doesn't show
632
+ # arrows on these (they're community connections, not calls).
633
+ from collections import Counter as _CommCounter
634
+ _edge_w = _CommCounter()
635
+ for _u, _v, _ in G.edges(data=True):
636
+ _cu = _node_to_comm.get(_u)
637
+ _cv = _node_to_comm.get(_v)
638
+ if _cu is None or _cv is None or _cu == _cv:
639
+ continue
640
+ _key = (_cu, _cv) if _cu < _cv else (_cv, _cu)
641
+ _edge_w[_key] += 1
642
+ for (_a, _b), _w in _edge_w.items():
643
+ _G_agg.add_edge(_a, _b, relation='inter_community', occurrences=_w)
644
+
645
+ # to_html requires a communities dict; one-element groups
646
+ # treat each aggregated node as its own community so each
647
+ # community keeps a distinct color in graphify's palette.
648
+ _agg_groups = {_cid: [_cid] for _cid in communities}
649
+
650
+ _to_html(
651
+ _G_agg, _agg_groups, str(_html_path),
652
+ community_labels=_labels, member_counts=_member_counts,
653
+ )
654
+ _viz_mode = 'aggregated'
655
+
656
+ # Sidecar so the dashboard renderer can label the view honestly.
657
+ # JSON is tiny (~120B); avoids parsing graph.json twice from TS.
658
+ _meta_path = _html_dir / 'graph.html.meta.json'
659
+ _meta_path.write_text(json.dumps({
660
+ 'mode': _viz_mode,
661
+ 'totalNodes': G.number_of_nodes(),
662
+ 'totalEdges': G.number_of_edges(),
663
+ 'communities': len(communities),
664
+ 'aggregatedNodeCount': len(communities) if _viz_mode == 'aggregated' else None,
665
+ }))
666
+ except Exception as _html_err:
667
+ sys.stderr.write(f"dxkit: graph.html not generated ({_html_err})\\n")
668
+
669
+ print(json.dumps({
670
+ "functionCount": len(functions),
671
+ "classCount": len([n for n, d in modules if any(
672
+ data.get("relation") == "method" for _, _, data in G.edges(n, data=True)
673
+ )]),
674
+ "maxFunctionsInFile": max_file[1] if max_file else 0,
675
+ "maxFunctionsFilePath": str(max_file[0]) if max_file else "",
676
+ "godNodeCount": god_count,
677
+ "communityCount": len(communities),
678
+ "avgCohesion": round(avg_cohesion, 3),
679
+ "orphanModuleCount": len(orphans),
680
+ "deadImportCount": len(dead),
681
+ "commentedCodeRatio": round(commented_ratio, 3),
682
+ "sourceFilesInGraph": total_src,
683
+ "graph": _graph_payload,
654
684
  }))
655
- except Exception as _html_err:
656
- sys.stderr.write(f"dxkit: graph.html not generated ({_html_err})\\n")
657
-
658
- # Clean up temp cache
659
- import shutil
660
- shutil.rmtree(str(_cache_dir), ignore_errors=True)
661
-
662
- print(json.dumps({
663
- "functionCount": len(functions),
664
- "classCount": len([n for n, d in modules if any(
665
- data.get("relation") == "method" for _, _, data in G.edges(n, data=True)
666
- )]),
667
- "maxFunctionsInFile": max_file[1] if max_file else 0,
668
- "maxFunctionsFilePath": str(max_file[0]) if max_file else "",
669
- "godNodeCount": god_count,
670
- "communityCount": len(communities),
671
- "avgCohesion": round(avg_cohesion, 3),
672
- "orphanModuleCount": len(orphans),
673
- "deadImportCount": len(dead),
674
- "commentedCodeRatio": round(commented_ratio, 3),
675
- "sourceFilesInGraph": total_src,
676
- "graph": _graph_payload,
677
- }))
678
685
  `;
679
686
  }
680
687
  /**
@@ -781,6 +788,15 @@ async function computeAndCache(cwd) {
781
788
  // don't litter /tmp across runs.
782
789
  const scriptDir = fs.mkdtempSync(path.join(os.tmpdir(), 'dxkit-graphify-'));
783
790
  const scriptPath = path.join(scriptDir, 'run.py');
791
+ // graphify's on-disk AST cache is redirected here (passed to the script
792
+ // as argv[2] → extract(cache_root=...)), keeping it out of the target
793
+ // repo. It lives under scriptDir so the single `fs.rmSync(scriptDir)`
794
+ // below reclaims it — crucially AFTER the Python process and its atexit
795
+ // handlers exit. graphify flushes a stat-index via atexit
796
+ // (graphify/cache.py), so cleaning the cache from inside the script
797
+ // would be undone by that post-exit write; owning the lifecycle here is
798
+ // the only leak-free point.
799
+ const cacheDir = path.join(scriptDir, 'graphify-cache');
784
800
  fs.writeFileSync(scriptPath, buildGraphifyScript(cwd));
785
801
  // Spawn-with-process-group so the Python interpreter + any
786
802
  // tree-sitter worker subprocesses it starts are all killed
@@ -793,7 +809,7 @@ async function computeAndCache(cwd) {
793
809
  //
794
810
  // runDetached captures stderr natively so the tempfile redirect
795
811
  // pattern is no longer needed — same effect, fewer moving parts.
796
- const outcome = await (0, runner_1.runDetached)(pythonCmd, [scriptPath, cwd], {
812
+ const outcome = await (0, runner_1.runDetached)(pythonCmd, [scriptPath, cwd, cacheDir], {
797
813
  cwd: scriptDir,
798
814
  timeoutMs: 300000, // 5 min — bumped from 120000 in 2.4.7 for multi-thousand-file frontend repos
799
815
  });