@vyuhlabs/dxkit 2.9.4 → 2.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +236 -0
- package/dist/allowlist/annotate.d.ts +71 -0
- package/dist/allowlist/annotate.d.ts.map +1 -0
- package/dist/allowlist/annotate.js +105 -0
- package/dist/allowlist/annotate.js.map +1 -0
- package/dist/allowlist/cli.d.ts +29 -23
- package/dist/allowlist/cli.d.ts.map +1 -1
- package/dist/allowlist/cli.js +141 -70
- package/dist/allowlist/cli.js.map +1 -1
- package/dist/allowlist/file.d.ts +7 -1
- package/dist/allowlist/file.d.ts.map +1 -1
- package/dist/allowlist/file.js +7 -1
- package/dist/allowlist/file.js.map +1 -1
- package/dist/analysis-result.d.ts +10 -0
- package/dist/analysis-result.d.ts.map +1 -1
- package/dist/analyzers/cache.d.ts +1 -0
- package/dist/analyzers/cache.d.ts.map +1 -1
- package/dist/analyzers/cache.js +69 -0
- package/dist/analyzers/cache.js.map +1 -1
- package/dist/analyzers/dashboard/index.d.ts.map +1 -1
- package/dist/analyzers/dashboard/index.js +6 -1
- package/dist/analyzers/dashboard/index.js.map +1 -1
- package/dist/analyzers/health.d.ts.map +1 -1
- package/dist/analyzers/health.js +17 -2
- package/dist/analyzers/health.js.map +1 -1
- package/dist/analyzers/security/actions.d.ts.map +1 -1
- package/dist/analyzers/security/actions.js +13 -0
- package/dist/analyzers/security/actions.js.map +1 -1
- package/dist/analyzers/security/aggregator.d.ts +97 -79
- package/dist/analyzers/security/aggregator.d.ts.map +1 -1
- package/dist/analyzers/security/aggregator.js +168 -56
- package/dist/analyzers/security/aggregator.js.map +1 -1
- package/dist/analyzers/security/gather.d.ts +2 -0
- package/dist/analyzers/security/gather.d.ts.map +1 -1
- package/dist/analyzers/security/gather.js +36 -4
- package/dist/analyzers/security/gather.js.map +1 -1
- package/dist/analyzers/security/index.d.ts.map +1 -1
- package/dist/analyzers/security/index.js +81 -2
- package/dist/analyzers/security/index.js.map +1 -1
- package/dist/analyzers/security/scanner-drift.d.ts +21 -0
- package/dist/analyzers/security/scanner-drift.d.ts.map +1 -0
- package/dist/analyzers/security/scanner-drift.js +113 -0
- package/dist/analyzers/security/scanner-drift.js.map +1 -0
- package/dist/analyzers/security/shallow.d.ts.map +1 -1
- package/dist/analyzers/security/shallow.js +24 -2
- package/dist/analyzers/security/shallow.js.map +1 -1
- package/dist/analyzers/security/types.d.ts +64 -4
- package/dist/analyzers/security/types.d.ts.map +1 -1
- package/dist/analyzers/tools/fingerprint.d.ts +133 -20
- package/dist/analyzers/tools/fingerprint.d.ts.map +1 -1
- package/dist/analyzers/tools/fingerprint.js +194 -20
- package/dist/analyzers/tools/fingerprint.js.map +1 -1
- package/dist/analyzers/tools/gitleaks.d.ts +2 -2
- package/dist/analyzers/tools/gitleaks.d.ts.map +1 -1
- package/dist/analyzers/tools/gitleaks.js +7 -1
- package/dist/analyzers/tools/gitleaks.js.map +1 -1
- package/dist/analyzers/tools/graphify.d.ts +11 -0
- package/dist/analyzers/tools/graphify.d.ts.map +1 -1
- package/dist/analyzers/tools/graphify.js +457 -413
- package/dist/analyzers/tools/graphify.js.map +1 -1
- package/dist/analyzers/tools/grep-secrets.d.ts.map +1 -1
- package/dist/analyzers/tools/grep-secrets.js +31 -12
- package/dist/analyzers/tools/grep-secrets.js.map +1 -1
- package/dist/analyzers/tools/osv-scanner-fix.d.ts.map +1 -1
- package/dist/analyzers/tools/osv-scanner-fix.js +12 -1
- package/dist/analyzers/tools/osv-scanner-fix.js.map +1 -1
- package/dist/analyzers/tools/salt.d.ts +68 -0
- package/dist/analyzers/tools/salt.d.ts.map +1 -0
- package/dist/{baseline → analyzers/tools}/salt.js +59 -18
- package/dist/analyzers/tools/salt.js.map +1 -0
- package/dist/analyzers/tools/semgrep.d.ts +7 -7
- package/dist/analyzers/tools/semgrep.d.ts.map +1 -1
- package/dist/analyzers/tools/semgrep.js +14 -7
- package/dist/analyzers/tools/semgrep.js.map +1 -1
- package/dist/analyzers/tools/tool-registry.d.ts.map +1 -1
- package/dist/analyzers/tools/tool-registry.js +78 -43
- package/dist/analyzers/tools/tool-registry.js.map +1 -1
- package/dist/analyzers/tools/walk-source-files.d.ts +10 -0
- package/dist/analyzers/tools/walk-source-files.d.ts.map +1 -1
- package/dist/analyzers/tools/walk-source-files.js +14 -0
- package/dist/analyzers/tools/walk-source-files.js.map +1 -1
- package/dist/analyzers/types.d.ts +9 -0
- package/dist/analyzers/types.d.ts.map +1 -1
- package/dist/baseline/baseline-file.d.ts +9 -2
- package/dist/baseline/baseline-file.d.ts.map +1 -1
- package/dist/baseline/baseline-file.js.map +1 -1
- package/dist/baseline/check-renderers.d.ts.map +1 -1
- package/dist/baseline/check-renderers.js +14 -0
- package/dist/baseline/check-renderers.js.map +1 -1
- package/dist/baseline/check.d.ts +33 -0
- package/dist/baseline/check.d.ts.map +1 -1
- package/dist/baseline/check.js +78 -2
- package/dist/baseline/check.js.map +1 -1
- package/dist/baseline/create.d.ts +1 -1
- package/dist/baseline/create.d.ts.map +1 -1
- package/dist/baseline/create.js +3 -1
- package/dist/baseline/create.js.map +1 -1
- package/dist/baseline/entry-to-located.d.ts +12 -5
- package/dist/baseline/entry-to-located.d.ts.map +1 -1
- package/dist/baseline/entry-to-located.js +21 -7
- package/dist/baseline/entry-to-located.js.map +1 -1
- package/dist/baseline/finding-identity.d.ts +20 -13
- package/dist/baseline/finding-identity.d.ts.map +1 -1
- package/dist/baseline/finding-identity.js +51 -20
- package/dist/baseline/finding-identity.js.map +1 -1
- package/dist/baseline/git-aware-match.d.ts +7 -5
- package/dist/baseline/git-aware-match.d.ts.map +1 -1
- package/dist/baseline/git-aware-match.js +78 -5
- package/dist/baseline/git-aware-match.js.map +1 -1
- package/dist/baseline/migrate.d.ts +94 -0
- package/dist/baseline/migrate.d.ts.map +1 -0
- package/dist/baseline/migrate.js +238 -0
- package/dist/baseline/migrate.js.map +1 -0
- package/dist/baseline/producers/security.d.ts +9 -9
- package/dist/baseline/producers/security.d.ts.map +1 -1
- package/dist/baseline/producers/security.js +16 -4
- package/dist/baseline/producers/security.js.map +1 -1
- package/dist/baseline/types.d.ts +145 -95
- package/dist/baseline/types.d.ts.map +1 -1
- package/dist/baseline/types.js +30 -26
- package/dist/baseline/types.js.map +1 -1
- package/dist/explore/context-hook.d.ts +49 -29
- package/dist/explore/context-hook.d.ts.map +1 -1
- package/dist/explore/context-hook.js +304 -29
- package/dist/explore/context-hook.js.map +1 -1
- package/dist/explore/finding-context.d.ts +17 -0
- package/dist/explore/finding-context.d.ts.map +1 -1
- package/dist/explore/finding-context.js +34 -0
- package/dist/explore/finding-context.js.map +1 -1
- package/dist/explore/queries.d.ts +32 -15
- package/dist/explore/queries.d.ts.map +1 -1
- package/dist/explore/queries.js +36 -6
- package/dist/explore/queries.js.map +1 -1
- package/dist/generator.d.ts.map +1 -1
- package/dist/generator.js +13 -7
- package/dist/generator.js.map +1 -1
- package/dist/ingest/normalize.d.ts +1 -1
- package/dist/ingest/normalize.d.ts.map +1 -1
- package/dist/ingest/normalize.js +5 -1
- package/dist/ingest/normalize.js.map +1 -1
- package/dist/ingest/sarif.d.ts.map +1 -1
- package/dist/ingest/sarif.js +16 -7
- package/dist/ingest/sarif.js.map +1 -1
- package/dist/ingest/snyk-policy.d.ts +22 -1
- package/dist/ingest/snyk-policy.d.ts.map +1 -1
- package/dist/ingest/snyk-policy.js +75 -18
- package/dist/ingest/snyk-policy.js.map +1 -1
- package/dist/ingest/types.d.ts +23 -12
- package/dist/ingest/types.d.ts.map +1 -1
- package/dist/languages/capabilities/types.d.ts +64 -53
- package/dist/languages/capabilities/types.d.ts.map +1 -1
- package/dist/languages/capabilities/types.js +4 -4
- package/dist/languages/index.d.ts +28 -5
- package/dist/languages/index.d.ts.map +1 -1
- package/dist/languages/index.js +38 -7
- package/dist/languages/index.js.map +1 -1
- package/dist/languages/typescript.d.ts.map +1 -1
- package/dist/languages/typescript.js +19 -0
- package/dist/languages/typescript.js.map +1 -1
- package/dist/scoring/dimensions/security.d.ts +17 -0
- package/dist/scoring/dimensions/security.d.ts.map +1 -1
- package/dist/scoring/dimensions/security.js +12 -0
- package/dist/scoring/dimensions/security.js.map +1 -1
- package/dist/update.d.ts.map +1 -1
- package/dist/update.js +49 -0
- package/dist/update.js.map +1 -1
- package/dist/upgrade.d.ts.map +1 -1
- package/dist/upgrade.js +2 -1
- package/dist/upgrade.js.map +1 -1
- package/package.json +6 -3
- package/templates/.claude/skills/dxkit-action/SKILL.md +11 -2
- package/templates/.claude/skills/dxkit-allowlist/SKILL.md +9 -0
- package/templates/.claude/skills/dxkit-onboard/SKILL.md +2 -2
- package/templates/.claude/skills/dxkit-update/SKILL.md +45 -4
- package/dist/baseline/salt.d.ts +0 -45
- package/dist/baseline/salt.d.ts.map +0 -1
- package/dist/baseline/salt.js.map +0 -1
|
@@ -34,6 +34,7 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
36
|
exports.graphifyProvider = void 0;
|
|
37
|
+
exports.buildGraphifyScript = buildGraphifyScript;
|
|
37
38
|
exports.gatherGraphifyResult = gatherGraphifyResult;
|
|
38
39
|
exports.gatherGraphifyGraph = gatherGraphifyGraph;
|
|
39
40
|
exports.buildGraphifyEnvelope = buildGraphifyEnvelope;
|
|
@@ -60,19 +61,42 @@ const path = __importStar(require("path"));
|
|
|
60
61
|
const runner_1 = require("./runner");
|
|
61
62
|
const tool_registry_1 = require("./tool-registry");
|
|
62
63
|
const exclusions_1 = require("./exclusions");
|
|
64
|
+
const languages_1 = require("../../languages");
|
|
63
65
|
const paths_1 = require("./paths");
|
|
64
66
|
const types_1 = require("../../explore/types");
|
|
65
|
-
/**
|
|
67
|
+
/**
|
|
68
|
+
* Build the graphify Python script with cwd-specific exclusions baked in.
|
|
69
|
+
*
|
|
70
|
+
* Exported so the structural contract of the generated script — the
|
|
71
|
+
* `if __name__ == '__main__'` guard that keeps ProcessPoolExecutor workers
|
|
72
|
+
* from re-running extraction under spawn/forkserver (Python 3.14's Linux
|
|
73
|
+
* default), and the public `extract(cache_root=...)` cache redirect that
|
|
74
|
+
* replaced the fragile `cache_dir` monkeypatch — is unit-testable without a
|
|
75
|
+
* Python interpreter or graphify installed (mirrors `buildGraphifyEnvelope`).
|
|
76
|
+
*/
|
|
66
77
|
function buildGraphifyScript(cwd) {
|
|
67
78
|
const { dirsSet, pathsList, fileGlobsList } = (0, exclusions_1.getPythonExcludeFilter)(cwd);
|
|
79
|
+
// Source-extension allowlist for the CODE graph. graphify's collect_files
|
|
80
|
+
// enumerates everything its _DISPATCH table can parse — including .md / .mdx
|
|
81
|
+
// (markdown headings → "module" nodes) and .json (config + lockfile keys →
|
|
82
|
+
// nodes). On NodeGoat that produced a graph that was ~92% non-code:
|
|
83
|
+
// package-lock.json alone contributed 137 nodes, .claude/**/*.md (dxkit's
|
|
84
|
+
// own scaffolding) 205, .vyuh-dxkit.json 53 — versus 51 nodes of real app
|
|
85
|
+
// code. Doc/config nodes pollute every graph-derived surface (communities,
|
|
86
|
+
// hot-files, api-surface, god-node ranking) and the context-hook's file
|
|
87
|
+
// summaries. Restrict the walk to the pack-declared source extensions
|
|
88
|
+
// (Rule 3/6: "what counts as source" is a language fact). graphify's TS
|
|
89
|
+
// import resolution reads tsconfig.json / package.json by direct path, not
|
|
90
|
+
// from the collected set, so dropping config files from the walk does not
|
|
91
|
+
// affect import-edge resolution.
|
|
92
|
+
const includeExtsSet = `set([${(0, languages_1.allSourceExtensions)()
|
|
93
|
+
.map((e) => `'${e.toLowerCase()}'`)
|
|
94
|
+
.join(', ')}])`;
|
|
68
95
|
return `# Exclusion set derived from src/analyzers/tools/exclusions.ts
|
|
69
|
-
import json, sys, os
|
|
96
|
+
import json, sys, os
|
|
70
97
|
from pathlib import Path
|
|
71
98
|
from collections import Counter
|
|
72
99
|
|
|
73
|
-
# Redirect graphify cache to /tmp so we don't pollute the target repo
|
|
74
|
-
_cache_dir = Path(tempfile.mkdtemp(prefix='dxkit-graphify-'))
|
|
75
|
-
|
|
76
100
|
try:
|
|
77
101
|
from graphify.extract import extract, collect_files
|
|
78
102
|
from graphify.build import build
|
|
@@ -82,17 +106,6 @@ except ImportError:
|
|
|
82
106
|
print(json.dumps({"error": "graphify not installed"}))
|
|
83
107
|
sys.exit(0)
|
|
84
108
|
|
|
85
|
-
# Redirect graphify's on-disk cache BEFORE any graphify function runs.
|
|
86
|
-
# collect_files() eagerly resolves cache_dir() during enumeration, so
|
|
87
|
-
# the patch has to land before the first graphify call — not after.
|
|
88
|
-
# Pre-patch, a 'graphify-out/cache/' directory was created in the
|
|
89
|
-
# customer's repo every time the analyzer touched a project.
|
|
90
|
-
import graphify.cache as _gc
|
|
91
|
-
_gc.cache_dir = lambda root=None: _cache_dir / "cache"
|
|
92
|
-
(_cache_dir / "cache").mkdir(parents=True, exist_ok=True)
|
|
93
|
-
|
|
94
|
-
target = Path(sys.argv[1])
|
|
95
|
-
|
|
96
109
|
# Three-axis exclusion. EXCLUDE_DIRS is basename-only (any path
|
|
97
110
|
# segment matching skips the file). EXCLUDE_PATHS holds multi-segment
|
|
98
111
|
# relative paths from .dxkit-ignore (e.g. 'app/modules/plugins/VendorPlugin')
|
|
@@ -106,6 +119,12 @@ EXCLUDE_DIRS = ${dirsSet}
|
|
|
106
119
|
EXCLUDE_PATHS = ${pathsList}
|
|
107
120
|
EXCLUDE_FILE_GLOBS = ${fileGlobsList}
|
|
108
121
|
|
|
122
|
+
# Source-extension allowlist (pack-declared via allSourceExtensions()).
|
|
123
|
+
# Keeps the CODE graph to actual source files — graphify also parses .md /
|
|
124
|
+
# .json into nodes, which is noise for code navigation. Empty set would be a
|
|
125
|
+
# bug (no files pass); the TS builder always emits a non-empty literal.
|
|
126
|
+
INCLUDE_EXTS = ${includeExtsSet}
|
|
127
|
+
|
|
109
128
|
# Bytes-per-line floor above which a file is almost certainly minified
|
|
110
129
|
# / bundled output. Mirrors the heuristic in
|
|
111
130
|
# src/analyzers/tools/minified-detection.ts so graphify's enumeration
|
|
@@ -132,6 +151,11 @@ def _is_likely_minified(f):
|
|
|
132
151
|
return False
|
|
133
152
|
|
|
134
153
|
def _is_excluded(f):
|
|
154
|
+
# Source-extension allowlist first: anything that isn't a pack-declared
|
|
155
|
+
# source file (markdown, JSON config, lockfiles, plain text) is not part
|
|
156
|
+
# of the code graph.
|
|
157
|
+
if f.suffix.lower() not in INCLUDE_EXTS:
|
|
158
|
+
return True
|
|
135
159
|
if any(seg in EXCLUDE_DIRS for seg in f.parts):
|
|
136
160
|
return True
|
|
137
161
|
name = f.name
|
|
@@ -274,407 +298,418 @@ def _strip_paren_suffix(label):
|
|
|
274
298
|
s = s.rsplit('.', 1)[1]
|
|
275
299
|
return s
|
|
276
300
|
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
#
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
#
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
#
|
|
386
|
-
#
|
|
387
|
-
#
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
}
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
#
|
|
422
|
-
#
|
|
423
|
-
#
|
|
424
|
-
|
|
425
|
-
#
|
|
426
|
-
#
|
|
427
|
-
#
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
301
|
+
if __name__ == '__main__':
|
|
302
|
+
# ProcessPoolExecutor workers re-import this module under spawn/
|
|
303
|
+
# forkserver (the Python 3.14 default on Linux); the __main__ guard
|
|
304
|
+
# keeps extraction from re-running per worker. graphify's own
|
|
305
|
+
# _extract_parallel requires this guard (it warns BrokenProcessPool
|
|
306
|
+
# and dies without it). See graphify/extract.py:_extract_parallel.
|
|
307
|
+
target = Path(sys.argv[1])
|
|
308
|
+
# graphify's on-disk cache is redirected here (the public cache_root
|
|
309
|
+
# param passed to extract() below) so it never lands in the target
|
|
310
|
+
# repo. The TS caller owns this dir's lifecycle — it lives under the
|
|
311
|
+
# ephemeral scriptDir and is removed after this process fully exits,
|
|
312
|
+
# which is the only point that survives graphify's atexit stat-index
|
|
313
|
+
# flush (graphify/cache.py registers _flush_stat_index at exit, so a
|
|
314
|
+
# Python-side rmtree here would be undone by that post-exit write).
|
|
315
|
+
_cache_dir = Path(sys.argv[2])
|
|
316
|
+
all_files = collect_files(target)
|
|
317
|
+
files = [f for f in all_files if not _is_excluded(f)]
|
|
318
|
+
if not files:
|
|
319
|
+
print(json.dumps({"error": "no files found"}))
|
|
320
|
+
sys.exit(0)
|
|
321
|
+
|
|
322
|
+
# Suppress progress output by redirecting stdout during extraction
|
|
323
|
+
import io
|
|
324
|
+
_real_stdout = sys.stdout
|
|
325
|
+
sys.stdout = io.StringIO()
|
|
326
|
+
result = extract(files, cache_root=_cache_dir)
|
|
327
|
+
sys.stdout = _real_stdout
|
|
328
|
+
G = build([result], directed=True)
|
|
329
|
+
communities = cluster(G)
|
|
330
|
+
|
|
331
|
+
# Functions vs modules
|
|
332
|
+
nodes = list(G.nodes(data=True))
|
|
333
|
+
functions = [(n, d) for n, d in nodes if "()" in d.get("label", "")]
|
|
334
|
+
modules = [(n, d) for n, d in nodes if "()" not in d.get("label", "")]
|
|
335
|
+
|
|
336
|
+
# Functions per file
|
|
337
|
+
file_funcs = Counter()
|
|
338
|
+
for n, d in functions:
|
|
339
|
+
sf = d.get("source_file", "")
|
|
340
|
+
file_funcs[sf] += 1
|
|
341
|
+
|
|
342
|
+
max_file = file_funcs.most_common(1)[0] if file_funcs else ("", 0)
|
|
343
|
+
|
|
344
|
+
# God nodes: graphifyy@0.5.0 renamed the result key "edges" → "degree".
|
|
345
|
+
gods = god_nodes(G, top_n=50)
|
|
346
|
+
god_count = sum(1 for g in gods if g["degree"] > 15)
|
|
347
|
+
|
|
348
|
+
# Cohesion
|
|
349
|
+
scores = score_all(G, communities) if communities else {}
|
|
350
|
+
avg_cohesion = sum(scores.values()) / len(scores) if scores else 0.0
|
|
351
|
+
|
|
352
|
+
# Orphan modules (no inbound imports)
|
|
353
|
+
import_targets = set()
|
|
354
|
+
for u, v, data in G.edges(data=True):
|
|
355
|
+
if data.get("relation") == "imports_from":
|
|
356
|
+
import_targets.add(v)
|
|
357
|
+
module_ids = set(n for n, d in modules)
|
|
358
|
+
orphans = module_ids - import_targets
|
|
359
|
+
|
|
360
|
+
# Dead imports (imported but never called)
|
|
361
|
+
call_targets = set()
|
|
362
|
+
for u, v, data in G.edges(data=True):
|
|
363
|
+
if data.get("relation") == "calls":
|
|
364
|
+
call_targets.add(v)
|
|
365
|
+
dead = import_targets - call_targets - module_ids
|
|
366
|
+
|
|
367
|
+
# Commented code ratio: source files with 0 function/class AST nodes
|
|
368
|
+
source_files_set = set()
|
|
369
|
+
files_with_nodes = set()
|
|
370
|
+
for n, d in nodes:
|
|
371
|
+
sf = d.get("source_file", "")
|
|
372
|
+
if sf:
|
|
373
|
+
source_files_set.add(sf)
|
|
374
|
+
if "()" in d.get("label", "") or any(
|
|
375
|
+
data.get("relation") == "method"
|
|
376
|
+
for _, _, data in G.edges(n, data=True)
|
|
377
|
+
):
|
|
378
|
+
files_with_nodes.add(sf)
|
|
379
|
+
|
|
380
|
+
total_src = len(source_files_set)
|
|
381
|
+
empty_files = total_src - len(files_with_nodes)
|
|
382
|
+
commented_ratio = empty_files / total_src if total_src > 0 else 0.0
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
# ── Build the full graph artifact ────────────────────────────────────────────
|
|
386
|
+
# 2.7 Sprint 1: emit nodes / edges / communities / symbolIndex alongside
|
|
387
|
+
# the aggregate metrics. Consumers (explore CLI, dashboard viz, future
|
|
388
|
+
# 2.8 context CLI + reachability) read this via src/explore/load.ts.
|
|
389
|
+
# Schema contract documented in tmp/2.7-graph-json-schema.md.
|
|
390
|
+
|
|
391
|
+
# Determine class membership: a module-shaped node is a CLASS if it has
|
|
392
|
+
# outbound 'method' edges to other nodes (it's the owner). A function-
|
|
393
|
+
# shaped node ("()" in label) is a METHOD if it has inbound 'method'
|
|
394
|
+
# edges from a class node; otherwise it's a free FUNCTION.
|
|
395
|
+
_class_owners = set()
|
|
396
|
+
_method_members = set()
|
|
397
|
+
for u, v, data in G.edges(data=True):
|
|
398
|
+
if data.get("relation") == "method":
|
|
399
|
+
_class_owners.add(u)
|
|
400
|
+
_method_members.add(v)
|
|
401
|
+
|
|
402
|
+
def _node_kind(nid, attrs):
|
|
403
|
+
label = attrs.get('label', '')
|
|
404
|
+
is_callable = '()' in label
|
|
405
|
+
if is_callable:
|
|
406
|
+
return 'method' if nid in _method_members else 'function'
|
|
407
|
+
return 'class' if nid in _class_owners else 'module'
|
|
408
|
+
|
|
409
|
+
# Make node sourceFile paths project-relative (graphify emits absolute
|
|
410
|
+
# paths derived from \`target = sys.argv[1]\`). Mirrors the existing
|
|
411
|
+
# maxFunctionsFilePath path-normalization at the TS layer.
|
|
412
|
+
def _rel(p):
|
|
413
|
+
if not p:
|
|
414
|
+
return ''
|
|
415
|
+
s = str(p).replace(os.sep, '/')
|
|
416
|
+
t = str(target).replace(os.sep, '/').rstrip('/')
|
|
417
|
+
if s.startswith(t + '/'):
|
|
418
|
+
return s[len(t) + 1:]
|
|
419
|
+
if s == t:
|
|
420
|
+
return ''
|
|
421
|
+
return s
|
|
422
|
+
|
|
423
|
+
# Assign stable in-run ids: n0, n1, n2, ... in extraction order. The
|
|
424
|
+
# graphify-internal id strings (long underscored slugs) work but bloat
|
|
425
|
+
# the JSON by ~20 bytes per node; the n<idx> shortening saves ~50KB on
|
|
426
|
+
# a 13k-node repo. IDs are NOT stable across runs (per schema doc).
|
|
427
|
+
_id_remap = {}
|
|
428
|
+
graph_nodes = []
|
|
429
|
+
for idx, (nid, attrs) in enumerate(nodes):
|
|
430
|
+
short_id = f'n{idx}'
|
|
431
|
+
_id_remap[nid] = short_id
|
|
432
|
+
line_no = _parse_line_no(attrs)
|
|
433
|
+
rel_source = _rel(attrs.get('source_file', ''))
|
|
434
|
+
label = attrs.get('label', '')
|
|
435
|
+
name = _strip_paren_suffix(label)
|
|
436
|
+
kind = _node_kind(nid, attrs)
|
|
437
|
+
node_obj = {
|
|
438
|
+
'id': short_id,
|
|
439
|
+
'kind': kind,
|
|
440
|
+
'label': label,
|
|
441
|
+
'sourceFile': rel_source,
|
|
442
|
+
}
|
|
443
|
+
if line_no:
|
|
444
|
+
node_obj['line'] = line_no
|
|
445
|
+
# Export detection only meaningful for symbol-bearing kinds
|
|
446
|
+
# (functions, classes, methods). Module-level "is this file
|
|
447
|
+
# exported?" isn't a useful question — exclude.
|
|
448
|
+
if kind in ('function', 'class', 'method'):
|
|
449
|
+
# Resolve to absolute path for the file-line cache (we read
|
|
450
|
+
# the raw source content; the cache key is the actual path
|
|
451
|
+
# on disk, not the project-relative form).
|
|
452
|
+
abs_source = attrs.get('source_file', '')
|
|
453
|
+
exported = _detect_exported(abs_source, line_no, name)
|
|
454
|
+
if exported is not None:
|
|
455
|
+
node_obj['exported'] = exported
|
|
456
|
+
graph_nodes.append(node_obj)
|
|
457
|
+
|
|
458
|
+
# Edges remapped to short ids. Drop self-loops and edges where either
|
|
459
|
+
# endpoint was filtered out (defensive — graphify shouldn't produce them
|
|
460
|
+
# but be tolerant). Graphify emits both 'imports' (broad form: \`import X\`)
|
|
461
|
+
# and 'imports_from' (\`from X import Y\` / \`import {Y} from X\`); both
|
|
462
|
+
# carry the same semantic for our schema ("A imports from B"). Merge
|
|
463
|
+
# both into the canonical 'imports_from' edge relation. The 'contains'
|
|
464
|
+
# and 'inherits' relations graphify also produces are intentionally
|
|
465
|
+
# dropped — 'contains' duplicates the file/symbol-membership info
|
|
466
|
+
# already encoded in nodes' sourceFile field, and 'inherits' is
|
|
467
|
+
# class-inheritance which isn't yet a first-class schema relation.
|
|
468
|
+
graph_edges = []
|
|
469
|
+
for u, v, data in G.edges(data=True):
|
|
470
|
+
if u not in _id_remap or v not in _id_remap:
|
|
471
|
+
continue
|
|
472
|
+
graphify_relation = data.get('relation', '')
|
|
473
|
+
if graphify_relation == 'calls':
|
|
474
|
+
relation = 'calls'
|
|
475
|
+
elif graphify_relation in ('imports', 'imports_from'):
|
|
476
|
+
relation = 'imports_from'
|
|
477
|
+
elif graphify_relation == 'method':
|
|
478
|
+
relation = 'method'
|
|
479
|
+
else:
|
|
480
|
+
continue
|
|
481
|
+
edge_obj = {
|
|
482
|
+
'from': _id_remap[u],
|
|
483
|
+
'to': _id_remap[v],
|
|
484
|
+
'relation': relation,
|
|
485
|
+
}
|
|
486
|
+
graph_edges.append(edge_obj)
|
|
487
|
+
|
|
488
|
+
# Communities: for each cluster compute dominantSourceDir + dominantPack.
|
|
489
|
+
# dominantSourceDir = most common ancestor directory (the longest
|
|
490
|
+
# leading-segment path that >= 40% of members share); empty string when
|
|
491
|
+
# no clear dominant. dominantPack = most common pack id among member
|
|
492
|
+
# files' extensions; empty when no dominant pack.
|
|
493
|
+
def _ancestor_dir(rel_path):
|
|
494
|
+
if not rel_path or '/' not in rel_path:
|
|
495
|
+
return ''
|
|
496
|
+
return rel_path.rsplit('/', 1)[0] + '/'
|
|
497
|
+
|
|
498
|
+
graph_communities = []
|
|
499
|
+
# Graphify's cluster() returns dict[community_id: list[node_id]].
|
|
500
|
+
# Iterate via .items(); the community_id is the actual cluster
|
|
501
|
+
# identifier (used to look up cohesion in scores), members is the
|
|
502
|
+
# node-id list.
|
|
503
|
+
_node_attrs_by_id = dict(nodes)
|
|
504
|
+
for cidx, member_list in communities.items():
|
|
505
|
+
member_ids = sorted(_id_remap.get(n, '') for n in member_list if n in _id_remap)
|
|
506
|
+
member_ids = [m for m in member_ids if m]
|
|
507
|
+
if not member_ids:
|
|
508
|
+
continue
|
|
509
|
+
# Per-member source files (project-relative)
|
|
510
|
+
member_files = []
|
|
511
|
+
for nid in member_list:
|
|
512
|
+
if nid in _id_remap:
|
|
513
|
+
sf = _rel(_node_attrs_by_id.get(nid, {}).get('source_file', ''))
|
|
514
|
+
if sf:
|
|
515
|
+
member_files.append(sf)
|
|
516
|
+
# Dominant directory: longest common ancestor that >= 40% of
|
|
517
|
+
# members share (or empty if no clear winner).
|
|
518
|
+
dir_counter = Counter(_ancestor_dir(f) for f in member_files)
|
|
519
|
+
dir_counter.pop('', None)
|
|
520
|
+
dominant_dir = ''
|
|
521
|
+
if dir_counter:
|
|
522
|
+
top_dir, top_count = dir_counter.most_common(1)[0]
|
|
523
|
+
if top_count / len(member_files) >= 0.4:
|
|
524
|
+
dominant_dir = top_dir
|
|
525
|
+
# Dominant pack
|
|
526
|
+
pack_counter = Counter()
|
|
527
|
+
for f in member_files:
|
|
528
|
+
pk = _EXT_TO_PACK.get(_ext_of(f))
|
|
529
|
+
if pk:
|
|
530
|
+
pack_counter[pk] += 1
|
|
531
|
+
dominant_pack = ''
|
|
532
|
+
if pack_counter:
|
|
533
|
+
top_pack, top_pack_count = pack_counter.most_common(1)[0]
|
|
534
|
+
if top_pack_count / max(1, len(member_files)) >= 0.5:
|
|
535
|
+
dominant_pack = top_pack
|
|
536
|
+
cohesion = float(scores.get(cidx, 0.0)) if scores else 0.0
|
|
537
|
+
graph_communities.append({
|
|
538
|
+
'id': cidx,
|
|
539
|
+
'nodeIds': member_ids,
|
|
540
|
+
'cohesion': round(cohesion, 3),
|
|
541
|
+
'dominantSourceDir': dominant_dir,
|
|
542
|
+
'dominantPack': dominant_pack,
|
|
543
|
+
})
|
|
544
|
+
|
|
545
|
+
# Symbol index: lowercased label (without trailing ()) → list of nodeIds.
|
|
546
|
+
_symbol_index = {}
|
|
547
|
+
for node_obj in graph_nodes:
|
|
548
|
+
key = _strip_paren_suffix(node_obj['label']).lower()
|
|
549
|
+
if not key:
|
|
550
|
+
continue
|
|
551
|
+
_symbol_index.setdefault(key, []).append(node_obj['id'])
|
|
552
|
+
|
|
553
|
+
# Active-pack detection: derive from extensions seen in source files.
|
|
554
|
+
_packs_seen = sorted({_EXT_TO_PACK[e] for e in (_ext_of(_rel(d.get('source_file', '')))
|
|
555
|
+
for _, d in nodes)
|
|
556
|
+
if e in _EXT_TO_PACK})
|
|
557
|
+
|
|
558
|
+
# Size-budget enforcement. Hard cap 50MB serialized. If we exceed,
|
|
559
|
+
# drop method edges first (densest class — structural noise, doesn't
|
|
560
|
+
# affect call-graph queries).
|
|
561
|
+
import datetime as _dt
|
|
562
|
+
_meta = {
|
|
563
|
+
'tool': 'graphify',
|
|
564
|
+
'graphifyVersion': '', # filled by TS-side post-parse (read from graphifyy package version)
|
|
565
|
+
'dxkitVersion': '', # filled by TS-side post-parse (read from package.json)
|
|
566
|
+
'generatedAt': _dt.datetime.now(_dt.timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ'),
|
|
567
|
+
'sourceFilesInGraph': total_src,
|
|
568
|
+
'excludedFileCount': len(all_files) - len(files),
|
|
569
|
+
'packs': _packs_seen,
|
|
570
|
+
'truncated': False,
|
|
571
|
+
'truncatedReason': '',
|
|
446
572
|
}
|
|
447
|
-
graph_edges.append(edge_obj)
|
|
448
|
-
|
|
449
|
-
# Communities: for each cluster compute dominantSourceDir + dominantPack.
|
|
450
|
-
# dominantSourceDir = most common ancestor directory (the longest
|
|
451
|
-
# leading-segment path that >= 40% of members share); empty string when
|
|
452
|
-
# no clear dominant. dominantPack = most common pack id among member
|
|
453
|
-
# files' extensions; empty when no dominant pack.
|
|
454
|
-
def _ancestor_dir(rel_path):
|
|
455
|
-
if not rel_path or '/' not in rel_path:
|
|
456
|
-
return ''
|
|
457
|
-
return rel_path.rsplit('/', 1)[0] + '/'
|
|
458
|
-
|
|
459
|
-
graph_communities = []
|
|
460
|
-
# Graphify's cluster() returns dict[community_id: list[node_id]].
|
|
461
|
-
# Iterate via .items(); the community_id is the actual cluster
|
|
462
|
-
# identifier (used to look up cohesion in scores), members is the
|
|
463
|
-
# node-id list.
|
|
464
|
-
_node_attrs_by_id = dict(nodes)
|
|
465
|
-
for cidx, member_list in communities.items():
|
|
466
|
-
member_ids = sorted(_id_remap.get(n, '') for n in member_list if n in _id_remap)
|
|
467
|
-
member_ids = [m for m in member_ids if m]
|
|
468
|
-
if not member_ids:
|
|
469
|
-
continue
|
|
470
|
-
# Per-member source files (project-relative)
|
|
471
|
-
member_files = []
|
|
472
|
-
for nid in member_list:
|
|
473
|
-
if nid in _id_remap:
|
|
474
|
-
sf = _rel(_node_attrs_by_id.get(nid, {}).get('source_file', ''))
|
|
475
|
-
if sf:
|
|
476
|
-
member_files.append(sf)
|
|
477
|
-
# Dominant directory: longest common ancestor that >= 40% of
|
|
478
|
-
# members share (or empty if no clear winner).
|
|
479
|
-
dir_counter = Counter(_ancestor_dir(f) for f in member_files)
|
|
480
|
-
dir_counter.pop('', None)
|
|
481
|
-
dominant_dir = ''
|
|
482
|
-
if dir_counter:
|
|
483
|
-
top_dir, top_count = dir_counter.most_common(1)[0]
|
|
484
|
-
if top_count / len(member_files) >= 0.4:
|
|
485
|
-
dominant_dir = top_dir
|
|
486
|
-
# Dominant pack
|
|
487
|
-
pack_counter = Counter()
|
|
488
|
-
for f in member_files:
|
|
489
|
-
pk = _EXT_TO_PACK.get(_ext_of(f))
|
|
490
|
-
if pk:
|
|
491
|
-
pack_counter[pk] += 1
|
|
492
|
-
dominant_pack = ''
|
|
493
|
-
if pack_counter:
|
|
494
|
-
top_pack, top_pack_count = pack_counter.most_common(1)[0]
|
|
495
|
-
if top_pack_count / max(1, len(member_files)) >= 0.5:
|
|
496
|
-
dominant_pack = top_pack
|
|
497
|
-
cohesion = float(scores.get(cidx, 0.0)) if scores else 0.0
|
|
498
|
-
graph_communities.append({
|
|
499
|
-
'id': cidx,
|
|
500
|
-
'nodeIds': member_ids,
|
|
501
|
-
'cohesion': round(cohesion, 3),
|
|
502
|
-
'dominantSourceDir': dominant_dir,
|
|
503
|
-
'dominantPack': dominant_pack,
|
|
504
|
-
})
|
|
505
|
-
|
|
506
|
-
# Symbol index: lowercased label (without trailing ()) → list of nodeIds.
|
|
507
|
-
_symbol_index = {}
|
|
508
|
-
for node_obj in graph_nodes:
|
|
509
|
-
key = _strip_paren_suffix(node_obj['label']).lower()
|
|
510
|
-
if not key:
|
|
511
|
-
continue
|
|
512
|
-
_symbol_index.setdefault(key, []).append(node_obj['id'])
|
|
513
|
-
|
|
514
|
-
# Active-pack detection: derive from extensions seen in source files.
|
|
515
|
-
_packs_seen = sorted({_EXT_TO_PACK[e] for e in (_ext_of(_rel(d.get('source_file', '')))
|
|
516
|
-
for _, d in nodes)
|
|
517
|
-
if e in _EXT_TO_PACK})
|
|
518
|
-
|
|
519
|
-
# Size-budget enforcement. Hard cap 50MB serialized. If we exceed,
|
|
520
|
-
# drop method edges first (densest class — structural noise, doesn't
|
|
521
|
-
# affect call-graph queries).
|
|
522
|
-
import datetime as _dt
|
|
523
|
-
_meta = {
|
|
524
|
-
'tool': 'graphify',
|
|
525
|
-
'graphifyVersion': '', # filled by TS-side post-parse (read from graphifyy package version)
|
|
526
|
-
'dxkitVersion': '', # filled by TS-side post-parse (read from package.json)
|
|
527
|
-
'generatedAt': _dt.datetime.now(_dt.timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ'),
|
|
528
|
-
'sourceFilesInGraph': total_src,
|
|
529
|
-
'excludedFileCount': len(all_files) - len(files),
|
|
530
|
-
'packs': _packs_seen,
|
|
531
|
-
'truncated': False,
|
|
532
|
-
'truncatedReason': '',
|
|
533
|
-
}
|
|
534
573
|
|
|
535
|
-
_graph_payload = {
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
}
|
|
574
|
+
_graph_payload = {
|
|
575
|
+
'schemaVersion': 1,
|
|
576
|
+
'meta': _meta,
|
|
577
|
+
'nodes': graph_nodes,
|
|
578
|
+
'edges': graph_edges,
|
|
579
|
+
'communities': graph_communities,
|
|
580
|
+
'symbolIndex': _symbol_index,
|
|
581
|
+
}
|
|
543
582
|
|
|
544
|
-
# Cheap pre-check on size: serialize once, measure, drop method edges
|
|
545
|
-
# if over the cap, re-serialize. The 50MB cap matches the schema
|
|
546
|
-
# contract; 10MB soft target is informational only (no enforcement).
|
|
547
|
-
_BYTES_HARD_CAP = 50 * 1024 * 1024
|
|
548
|
-
|
|
549
|
-
def _serialize(payload):
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
_graph_json = _serialize(_graph_payload)
|
|
553
|
-
if len(_graph_json.encode('utf-8')) > _BYTES_HARD_CAP:
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
)
|
|
565
|
-
|
|
566
|
-
# Render the interactive viewer alongside graph.json so the dashboard
|
|
567
|
-
# Graph tab can embed it. graphify ships its own vis.js-based renderer
|
|
568
|
-
# (graphify.export.to_html). Two emission paths:
|
|
569
|
-
#
|
|
570
|
-
# - Full graph (G.number_of_nodes() <= MAX_NODES_FOR_VIZ = 5000):
|
|
571
|
-
# pass the original G + communities. The viewer renders every
|
|
572
|
-
# symbol; the user can zoom + drill.
|
|
573
|
-
#
|
|
574
|
-
# - Aggregated community view (G > MAX_NODES_FOR_VIZ): build a
|
|
575
|
-
# networkx super-graph whose nodes ARE the communities. Sized by
|
|
576
|
-
# member count via graphify member_counts parameter. Inter-
|
|
577
|
-
# community edges aggregated to weighted edges. This lets a
|
|
578
|
-
# customer-scale repo still get a meaningful "what does this
|
|
579
|
-
# codebase look like" viz instead of a dead empty-state.
|
|
580
|
-
#
|
|
581
|
-
# Either way failures are non-fatal: the dashboard surfaces a clear
|
|
582
|
-
# empty-state when graph.html isn't on disk.
|
|
583
|
-
try:
|
|
584
|
-
from graphify.export import to_html as _to_html, MAX_NODES_FOR_VIZ as _MAX_VIZ
|
|
585
|
-
import networkx as _nx
|
|
586
|
-
_html_dir = target / '.dxkit' / 'reports'
|
|
587
|
-
_html_dir.mkdir(parents=True, exist_ok=True)
|
|
588
|
-
_html_path = _html_dir / 'graph.html'
|
|
589
|
-
|
|
590
|
-
if G.number_of_nodes() <= _MAX_VIZ:
|
|
591
|
-
_labels = {
|
|
592
|
-
c['id']: (c.get('dominantSourceDir') or f"community-{c['id']}")
|
|
593
|
-
for c in graph_communities
|
|
594
|
-
}
|
|
595
|
-
_to_html(G, communities, str(_html_path), community_labels=_labels)
|
|
596
|
-
_viz_mode = 'full'
|
|
597
|
-
else:
|
|
598
|
-
# Aggregated community super-graph.
|
|
599
|
-
_node_to_comm = {}
|
|
600
|
-
for _cid, _members in communities.items():
|
|
601
|
-
for _nid in _members:
|
|
602
|
-
_node_to_comm[_nid] = _cid
|
|
603
|
-
|
|
604
|
-
_G_agg = _nx.DiGraph()
|
|
605
|
-
_member_counts = {}
|
|
606
|
-
_labels = {}
|
|
607
|
-
for _c in graph_communities:
|
|
608
|
-
_cid = _c['id']
|
|
609
|
-
_label = _c.get('dominantSourceDir') or f"community-{_cid}"
|
|
610
|
-
# vis.js node attrs: label drives display; file_type is
|
|
611
|
-
# surfaced in graphify's sidebar so we set a sentinel
|
|
612
|
-
# value the dashboard can grep on.
|
|
613
|
-
_G_agg.add_node(_cid, label=_label, source_file='', file_type='community')
|
|
614
|
-
_member_counts[_cid] = len(_c['nodeIds'])
|
|
615
|
-
_labels[_cid] = _label
|
|
616
|
-
|
|
617
|
-
# Cross-community edge aggregation. Counter keyed on
|
|
618
|
-
# (smaller_id, larger_id) for undirected aggregation; we then
|
|
619
|
-
# add a directed edge in one canonical direction so vis.js
|
|
620
|
-
# has a definite source/target. The viewer doesn't show
|
|
621
|
-
# arrows on these (they're community connections, not calls).
|
|
622
|
-
from collections import Counter as _CommCounter
|
|
623
|
-
_edge_w = _CommCounter()
|
|
624
|
-
for _u, _v, _ in G.edges(data=True):
|
|
625
|
-
_cu = _node_to_comm.get(_u)
|
|
626
|
-
_cv = _node_to_comm.get(_v)
|
|
627
|
-
if _cu is None or _cv is None or _cu == _cv:
|
|
628
|
-
continue
|
|
629
|
-
_key = (_cu, _cv) if _cu < _cv else (_cv, _cu)
|
|
630
|
-
_edge_w[_key] += 1
|
|
631
|
-
for (_a, _b), _w in _edge_w.items():
|
|
632
|
-
_G_agg.add_edge(_a, _b, relation='inter_community', occurrences=_w)
|
|
633
|
-
|
|
634
|
-
# to_html requires a communities dict; one-element groups
|
|
635
|
-
# treat each aggregated node as its own community so each
|
|
636
|
-
# community keeps a distinct color in graphify's palette.
|
|
637
|
-
_agg_groups = {_cid: [_cid] for _cid in communities}
|
|
638
|
-
|
|
639
|
-
_to_html(
|
|
640
|
-
_G_agg, _agg_groups, str(_html_path),
|
|
641
|
-
community_labels=_labels, member_counts=_member_counts,
|
|
583
|
+
# Cheap pre-check on size: serialize once, measure, drop method edges
|
|
584
|
+
# if over the cap, re-serialize. The 50MB cap matches the schema
|
|
585
|
+
# contract; 10MB soft target is informational only (no enforcement).
|
|
586
|
+
_BYTES_HARD_CAP = 50 * 1024 * 1024
|
|
587
|
+
|
|
588
|
+
def _serialize(payload):
|
|
589
|
+
return json.dumps(payload, separators=(',', ':'))
|
|
590
|
+
|
|
591
|
+
_graph_json = _serialize(_graph_payload)
|
|
592
|
+
if len(_graph_json.encode('utf-8')) > _BYTES_HARD_CAP:
|
|
593
|
+
# Drop method edges first; they're structural (class-owns-method),
|
|
594
|
+
# not behavioral. Call + import edges carry the actionable info.
|
|
595
|
+
pre_count = len(_graph_payload['edges'])
|
|
596
|
+
_graph_payload['edges'] = [e for e in _graph_payload['edges']
|
|
597
|
+
if e['relation'] != 'method']
|
|
598
|
+
post_count = len(_graph_payload['edges'])
|
|
599
|
+
_meta['truncated'] = True
|
|
600
|
+
_meta['truncatedReason'] = (
|
|
601
|
+
f"dropped {pre_count - post_count} method edges to fit under "
|
|
602
|
+
f"the 50MB hard cap"
|
|
642
603
|
)
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
#
|
|
646
|
-
#
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
604
|
+
|
|
605
|
+
# Render the interactive viewer alongside graph.json so the dashboard
|
|
606
|
+
# Graph tab can embed it. graphify ships its own vis.js-based renderer
|
|
607
|
+
# (graphify.export.to_html). Two emission paths:
|
|
608
|
+
#
|
|
609
|
+
# - Full graph (G.number_of_nodes() <= MAX_NODES_FOR_VIZ = 5000):
|
|
610
|
+
# pass the original G + communities. The viewer renders every
|
|
611
|
+
# symbol; the user can zoom + drill.
|
|
612
|
+
#
|
|
613
|
+
# - Aggregated community view (G > MAX_NODES_FOR_VIZ): build a
|
|
614
|
+
# networkx super-graph whose nodes ARE the communities. Sized by
|
|
615
|
+
# member count via graphify member_counts parameter. Inter-
|
|
616
|
+
# community edges aggregated to weighted edges. This lets a
|
|
617
|
+
# customer-scale repo still get a meaningful "what does this
|
|
618
|
+
# codebase look like" viz instead of a dead empty-state.
|
|
619
|
+
#
|
|
620
|
+
# Either way failures are non-fatal: the dashboard surfaces a clear
|
|
621
|
+
# empty-state when graph.html isn't on disk.
|
|
622
|
+
try:
|
|
623
|
+
from graphify.export import to_html as _to_html, MAX_NODES_FOR_VIZ as _MAX_VIZ
|
|
624
|
+
import networkx as _nx
|
|
625
|
+
_html_dir = target / '.dxkit' / 'reports'
|
|
626
|
+
_html_dir.mkdir(parents=True, exist_ok=True)
|
|
627
|
+
_html_path = _html_dir / 'graph.html'
|
|
628
|
+
|
|
629
|
+
if G.number_of_nodes() <= _MAX_VIZ:
|
|
630
|
+
_labels = {
|
|
631
|
+
c['id']: (c.get('dominantSourceDir') or f"community-{c['id']}")
|
|
632
|
+
for c in graph_communities
|
|
633
|
+
}
|
|
634
|
+
_to_html(G, communities, str(_html_path), community_labels=_labels)
|
|
635
|
+
_viz_mode = 'full'
|
|
636
|
+
else:
|
|
637
|
+
# Aggregated community super-graph.
|
|
638
|
+
_node_to_comm = {}
|
|
639
|
+
for _cid, _members in communities.items():
|
|
640
|
+
for _nid in _members:
|
|
641
|
+
_node_to_comm[_nid] = _cid
|
|
642
|
+
|
|
643
|
+
_G_agg = _nx.DiGraph()
|
|
644
|
+
_member_counts = {}
|
|
645
|
+
_labels = {}
|
|
646
|
+
for _c in graph_communities:
|
|
647
|
+
_cid = _c['id']
|
|
648
|
+
_label = _c.get('dominantSourceDir') or f"community-{_cid}"
|
|
649
|
+
# vis.js node attrs: label drives display; file_type is
|
|
650
|
+
# surfaced in graphify's sidebar so we set a sentinel
|
|
651
|
+
# value the dashboard can grep on.
|
|
652
|
+
_G_agg.add_node(_cid, label=_label, source_file='', file_type='community')
|
|
653
|
+
_member_counts[_cid] = len(_c['nodeIds'])
|
|
654
|
+
_labels[_cid] = _label
|
|
655
|
+
|
|
656
|
+
# Cross-community edge aggregation. Counter keyed on
|
|
657
|
+
# (smaller_id, larger_id) for undirected aggregation; we then
|
|
658
|
+
# add a directed edge in one canonical direction so vis.js
|
|
659
|
+
# has a definite source/target. The viewer doesn't show
|
|
660
|
+
# arrows on these (they're community connections, not calls).
|
|
661
|
+
from collections import Counter as _CommCounter
|
|
662
|
+
_edge_w = _CommCounter()
|
|
663
|
+
for _u, _v, _ in G.edges(data=True):
|
|
664
|
+
_cu = _node_to_comm.get(_u)
|
|
665
|
+
_cv = _node_to_comm.get(_v)
|
|
666
|
+
if _cu is None or _cv is None or _cu == _cv:
|
|
667
|
+
continue
|
|
668
|
+
_key = (_cu, _cv) if _cu < _cv else (_cv, _cu)
|
|
669
|
+
_edge_w[_key] += 1
|
|
670
|
+
for (_a, _b), _w in _edge_w.items():
|
|
671
|
+
_G_agg.add_edge(_a, _b, relation='inter_community', occurrences=_w)
|
|
672
|
+
|
|
673
|
+
# to_html requires a communities dict; one-element groups
|
|
674
|
+
# treat each aggregated node as its own community so each
|
|
675
|
+
# community keeps a distinct color in graphify's palette.
|
|
676
|
+
_agg_groups = {_cid: [_cid] for _cid in communities}
|
|
677
|
+
|
|
678
|
+
_to_html(
|
|
679
|
+
_G_agg, _agg_groups, str(_html_path),
|
|
680
|
+
community_labels=_labels, member_counts=_member_counts,
|
|
681
|
+
)
|
|
682
|
+
_viz_mode = 'aggregated'
|
|
683
|
+
|
|
684
|
+
# Sidecar so the dashboard renderer can label the view honestly.
|
|
685
|
+
# JSON is tiny (~120B); avoids parsing graph.json twice from TS.
|
|
686
|
+
_meta_path = _html_dir / 'graph.html.meta.json'
|
|
687
|
+
_meta_path.write_text(json.dumps({
|
|
688
|
+
'mode': _viz_mode,
|
|
689
|
+
'totalNodes': G.number_of_nodes(),
|
|
690
|
+
'totalEdges': G.number_of_edges(),
|
|
691
|
+
'communities': len(communities),
|
|
692
|
+
'aggregatedNodeCount': len(communities) if _viz_mode == 'aggregated' else None,
|
|
693
|
+
}))
|
|
694
|
+
except Exception as _html_err:
|
|
695
|
+
sys.stderr.write(f"dxkit: graph.html not generated ({_html_err})\\n")
|
|
696
|
+
|
|
697
|
+
print(json.dumps({
|
|
698
|
+
"functionCount": len(functions),
|
|
699
|
+
"classCount": len([n for n, d in modules if any(
|
|
700
|
+
data.get("relation") == "method" for _, _, data in G.edges(n, data=True)
|
|
701
|
+
)]),
|
|
702
|
+
"maxFunctionsInFile": max_file[1] if max_file else 0,
|
|
703
|
+
"maxFunctionsFilePath": str(max_file[0]) if max_file else "",
|
|
704
|
+
"godNodeCount": god_count,
|
|
705
|
+
"communityCount": len(communities),
|
|
706
|
+
"avgCohesion": round(avg_cohesion, 3),
|
|
707
|
+
"orphanModuleCount": len(orphans),
|
|
708
|
+
"deadImportCount": len(dead),
|
|
709
|
+
"commentedCodeRatio": round(commented_ratio, 3),
|
|
710
|
+
"sourceFilesInGraph": total_src,
|
|
711
|
+
"graph": _graph_payload,
|
|
654
712
|
}))
|
|
655
|
-
except Exception as _html_err:
|
|
656
|
-
sys.stderr.write(f"dxkit: graph.html not generated ({_html_err})\\n")
|
|
657
|
-
|
|
658
|
-
# Clean up temp cache
|
|
659
|
-
import shutil
|
|
660
|
-
shutil.rmtree(str(_cache_dir), ignore_errors=True)
|
|
661
|
-
|
|
662
|
-
print(json.dumps({
|
|
663
|
-
"functionCount": len(functions),
|
|
664
|
-
"classCount": len([n for n, d in modules if any(
|
|
665
|
-
data.get("relation") == "method" for _, _, data in G.edges(n, data=True)
|
|
666
|
-
)]),
|
|
667
|
-
"maxFunctionsInFile": max_file[1] if max_file else 0,
|
|
668
|
-
"maxFunctionsFilePath": str(max_file[0]) if max_file else "",
|
|
669
|
-
"godNodeCount": god_count,
|
|
670
|
-
"communityCount": len(communities),
|
|
671
|
-
"avgCohesion": round(avg_cohesion, 3),
|
|
672
|
-
"orphanModuleCount": len(orphans),
|
|
673
|
-
"deadImportCount": len(dead),
|
|
674
|
-
"commentedCodeRatio": round(commented_ratio, 3),
|
|
675
|
-
"sourceFilesInGraph": total_src,
|
|
676
|
-
"graph": _graph_payload,
|
|
677
|
-
}))
|
|
678
713
|
`;
|
|
679
714
|
}
|
|
680
715
|
/**
|
|
@@ -781,6 +816,15 @@ async function computeAndCache(cwd) {
|
|
|
781
816
|
// don't litter /tmp across runs.
|
|
782
817
|
const scriptDir = fs.mkdtempSync(path.join(os.tmpdir(), 'dxkit-graphify-'));
|
|
783
818
|
const scriptPath = path.join(scriptDir, 'run.py');
|
|
819
|
+
// graphify's on-disk AST cache is redirected here (passed to the script
|
|
820
|
+
// as argv[2] → extract(cache_root=...)), keeping it out of the target
|
|
821
|
+
// repo. It lives under scriptDir so the single `fs.rmSync(scriptDir)`
|
|
822
|
+
// below reclaims it — crucially AFTER the Python process and its atexit
|
|
823
|
+
// handlers exit. graphify flushes a stat-index via atexit
|
|
824
|
+
// (graphify/cache.py), so cleaning the cache from inside the script
|
|
825
|
+
// would be undone by that post-exit write; owning the lifecycle here is
|
|
826
|
+
// the only leak-free point.
|
|
827
|
+
const cacheDir = path.join(scriptDir, 'graphify-cache');
|
|
784
828
|
fs.writeFileSync(scriptPath, buildGraphifyScript(cwd));
|
|
785
829
|
// Spawn-with-process-group so the Python interpreter + any
|
|
786
830
|
// tree-sitter worker subprocesses it starts are all killed
|
|
@@ -793,7 +837,7 @@ async function computeAndCache(cwd) {
|
|
|
793
837
|
//
|
|
794
838
|
// runDetached captures stderr natively so the tempfile redirect
|
|
795
839
|
// pattern is no longer needed — same effect, fewer moving parts.
|
|
796
|
-
const outcome = await (0, runner_1.runDetached)(pythonCmd, [scriptPath, cwd], {
|
|
840
|
+
const outcome = await (0, runner_1.runDetached)(pythonCmd, [scriptPath, cwd, cacheDir], {
|
|
797
841
|
cwd: scriptDir,
|
|
798
842
|
timeoutMs: 300000, // 5 min — bumped from 120000 in 2.4.7 for multi-thousand-file frontend repos
|
|
799
843
|
});
|