@vyuhlabs/dxkit 2.9.3 → 2.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +170 -0
- package/README.md +9 -0
- package/dist/allowlist/annotate.d.ts +71 -0
- package/dist/allowlist/annotate.d.ts.map +1 -0
- package/dist/allowlist/annotate.js +105 -0
- package/dist/allowlist/annotate.js.map +1 -0
- package/dist/allowlist/cli.d.ts +6 -0
- package/dist/allowlist/cli.d.ts.map +1 -1
- package/dist/allowlist/cli.js +70 -37
- package/dist/allowlist/cli.js.map +1 -1
- package/dist/analyzers/dashboard/index.d.ts.map +1 -1
- package/dist/analyzers/dashboard/index.js +6 -1
- package/dist/analyzers/dashboard/index.js.map +1 -1
- package/dist/analyzers/developer/gather.d.ts +16 -0
- package/dist/analyzers/developer/gather.d.ts.map +1 -1
- package/dist/analyzers/developer/gather.js +2 -0
- package/dist/analyzers/developer/gather.js.map +1 -1
- package/dist/analyzers/developer/ownership.d.ts +86 -0
- package/dist/analyzers/developer/ownership.d.ts.map +1 -0
- package/dist/analyzers/developer/ownership.js +180 -0
- package/dist/analyzers/developer/ownership.js.map +1 -0
- package/dist/analyzers/health.d.ts.map +1 -1
- package/dist/analyzers/health.js +17 -2
- package/dist/analyzers/health.js.map +1 -1
- package/dist/analyzers/quality/detailed.d.ts +5 -1
- package/dist/analyzers/quality/detailed.d.ts.map +1 -1
- package/dist/analyzers/quality/detailed.js +30 -29
- package/dist/analyzers/quality/detailed.js.map +1 -1
- package/dist/analyzers/security/actions.d.ts.map +1 -1
- package/dist/analyzers/security/actions.js +13 -0
- package/dist/analyzers/security/actions.js.map +1 -1
- package/dist/analyzers/security/aggregator.d.ts +18 -0
- package/dist/analyzers/security/aggregator.d.ts.map +1 -1
- package/dist/analyzers/security/aggregator.js +28 -0
- package/dist/analyzers/security/aggregator.js.map +1 -1
- package/dist/analyzers/security/detailed.d.ts +7 -1
- package/dist/analyzers/security/detailed.d.ts.map +1 -1
- package/dist/analyzers/security/detailed.js +31 -15
- package/dist/analyzers/security/detailed.js.map +1 -1
- package/dist/analyzers/security/gather.d.ts.map +1 -1
- package/dist/analyzers/security/gather.js +6 -0
- package/dist/analyzers/security/gather.js.map +1 -1
- package/dist/analyzers/security/index.d.ts.map +1 -1
- package/dist/analyzers/security/index.js +81 -2
- package/dist/analyzers/security/index.js.map +1 -1
- package/dist/analyzers/security/scanner-drift.d.ts +21 -0
- package/dist/analyzers/security/scanner-drift.d.ts.map +1 -0
- package/dist/analyzers/security/scanner-drift.js +113 -0
- package/dist/analyzers/security/scanner-drift.js.map +1 -0
- package/dist/analyzers/security/shallow.d.ts.map +1 -1
- package/dist/analyzers/security/shallow.js +24 -2
- package/dist/analyzers/security/shallow.js.map +1 -1
- package/dist/analyzers/security/types.d.ts +38 -0
- package/dist/analyzers/security/types.d.ts.map +1 -1
- package/dist/analyzers/tests/detailed.d.ts +5 -1
- package/dist/analyzers/tests/detailed.d.ts.map +1 -1
- package/dist/analyzers/tests/detailed.js +27 -20
- package/dist/analyzers/tests/detailed.js.map +1 -1
- package/dist/analyzers/tools/graphify.d.ts +11 -0
- package/dist/analyzers/tools/graphify.d.ts.map +1 -1
- package/dist/analyzers/tools/graphify.js +429 -413
- package/dist/analyzers/tools/graphify.js.map +1 -1
- package/dist/analyzers/tools/grep-secrets.d.ts.map +1 -1
- package/dist/analyzers/tools/grep-secrets.js +9 -0
- package/dist/analyzers/tools/grep-secrets.js.map +1 -1
- package/dist/analyzers/tools/osv-scanner-fix.d.ts.map +1 -1
- package/dist/analyzers/tools/osv-scanner-fix.js +12 -1
- package/dist/analyzers/tools/osv-scanner-fix.js.map +1 -1
- package/dist/analyzers/tools/tool-registry.d.ts.map +1 -1
- package/dist/analyzers/tools/tool-registry.js +78 -43
- package/dist/analyzers/tools/tool-registry.js.map +1 -1
- package/dist/analyzers/tools/walk-source-files.d.ts +10 -0
- package/dist/analyzers/tools/walk-source-files.d.ts.map +1 -1
- package/dist/analyzers/tools/walk-source-files.js +14 -0
- package/dist/analyzers/tools/walk-source-files.js.map +1 -1
- package/dist/analyzers/types.d.ts +9 -0
- package/dist/analyzers/types.d.ts.map +1 -1
- package/dist/attribution/attribute.d.ts +57 -0
- package/dist/attribution/attribute.d.ts.map +1 -0
- package/dist/attribution/attribute.js +149 -0
- package/dist/attribution/attribute.js.map +1 -0
- package/dist/baseline/entry-to-located.d.ts +12 -5
- package/dist/baseline/entry-to-located.d.ts.map +1 -1
- package/dist/baseline/entry-to-located.js +21 -7
- package/dist/baseline/entry-to-located.js.map +1 -1
- package/dist/baseline/git-aware-match.d.ts +7 -5
- package/dist/baseline/git-aware-match.d.ts.map +1 -1
- package/dist/baseline/git-aware-match.js +78 -5
- package/dist/baseline/git-aware-match.js.map +1 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +53 -5
- package/dist/cli.js.map +1 -1
- package/dist/explore/context-hook.d.ts +49 -29
- package/dist/explore/context-hook.d.ts.map +1 -1
- package/dist/explore/context-hook.js +304 -29
- package/dist/explore/context-hook.js.map +1 -1
- package/dist/generator.d.ts.map +1 -1
- package/dist/generator.js +13 -7
- package/dist/generator.js.map +1 -1
- package/dist/ingest/snyk-policy.d.ts +22 -1
- package/dist/ingest/snyk-policy.d.ts.map +1 -1
- package/dist/ingest/snyk-policy.js +75 -18
- package/dist/ingest/snyk-policy.js.map +1 -1
- package/dist/languages/index.d.ts +28 -5
- package/dist/languages/index.d.ts.map +1 -1
- package/dist/languages/index.js +38 -7
- package/dist/languages/index.js.map +1 -1
- package/dist/languages/typescript.d.ts.map +1 -1
- package/dist/languages/typescript.js +19 -0
- package/dist/languages/typescript.js.map +1 -1
- package/dist/reviewers-cli.d.ts +57 -0
- package/dist/reviewers-cli.d.ts.map +1 -0
- package/dist/reviewers-cli.js +263 -0
- package/dist/reviewers-cli.js.map +1 -0
- package/dist/scoring/dimensions/security.d.ts +17 -0
- package/dist/scoring/dimensions/security.d.ts.map +1 -1
- package/dist/scoring/dimensions/security.js +12 -0
- package/dist/scoring/dimensions/security.js.map +1 -1
- package/package.json +1 -1
- package/templates/.claude/skills/dxkit-action/SKILL.md +13 -2
- package/templates/.claude/skills/dxkit-allowlist/SKILL.md +9 -0
- package/templates/.claude/skills/dxkit-onboard/SKILL.md +2 -2
- package/templates/.claude/skills/dxkit-pr/SKILL.md +22 -1
|
@@ -34,6 +34,7 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
36
|
exports.graphifyProvider = void 0;
|
|
37
|
+
exports.buildGraphifyScript = buildGraphifyScript;
|
|
37
38
|
exports.gatherGraphifyResult = gatherGraphifyResult;
|
|
38
39
|
exports.gatherGraphifyGraph = gatherGraphifyGraph;
|
|
39
40
|
exports.buildGraphifyEnvelope = buildGraphifyEnvelope;
|
|
@@ -62,17 +63,23 @@ const tool_registry_1 = require("./tool-registry");
|
|
|
62
63
|
const exclusions_1 = require("./exclusions");
|
|
63
64
|
const paths_1 = require("./paths");
|
|
64
65
|
const types_1 = require("../../explore/types");
|
|
65
|
-
/**
|
|
66
|
+
/**
|
|
67
|
+
* Build the graphify Python script with cwd-specific exclusions baked in.
|
|
68
|
+
*
|
|
69
|
+
* Exported so the structural contract of the generated script — the
|
|
70
|
+
* `if __name__ == '__main__'` guard that keeps ProcessPoolExecutor workers
|
|
71
|
+
* from re-running extraction under spawn/forkserver (Python 3.14's Linux
|
|
72
|
+
* default), and the public `extract(cache_root=...)` cache redirect that
|
|
73
|
+
* replaced the fragile `cache_dir` monkeypatch — is unit-testable without a
|
|
74
|
+
* Python interpreter or graphify installed (mirrors `buildGraphifyEnvelope`).
|
|
75
|
+
*/
|
|
66
76
|
function buildGraphifyScript(cwd) {
|
|
67
77
|
const { dirsSet, pathsList, fileGlobsList } = (0, exclusions_1.getPythonExcludeFilter)(cwd);
|
|
68
78
|
return `# Exclusion set derived from src/analyzers/tools/exclusions.ts
|
|
69
|
-
import json, sys, os
|
|
79
|
+
import json, sys, os
|
|
70
80
|
from pathlib import Path
|
|
71
81
|
from collections import Counter
|
|
72
82
|
|
|
73
|
-
# Redirect graphify cache to /tmp so we don't pollute the target repo
|
|
74
|
-
_cache_dir = Path(tempfile.mkdtemp(prefix='dxkit-graphify-'))
|
|
75
|
-
|
|
76
83
|
try:
|
|
77
84
|
from graphify.extract import extract, collect_files
|
|
78
85
|
from graphify.build import build
|
|
@@ -82,17 +89,6 @@ except ImportError:
|
|
|
82
89
|
print(json.dumps({"error": "graphify not installed"}))
|
|
83
90
|
sys.exit(0)
|
|
84
91
|
|
|
85
|
-
# Redirect graphify's on-disk cache BEFORE any graphify function runs.
|
|
86
|
-
# collect_files() eagerly resolves cache_dir() during enumeration, so
|
|
87
|
-
# the patch has to land before the first graphify call — not after.
|
|
88
|
-
# Pre-patch, a 'graphify-out/cache/' directory was created in the
|
|
89
|
-
# customer's repo every time the analyzer touched a project.
|
|
90
|
-
import graphify.cache as _gc
|
|
91
|
-
_gc.cache_dir = lambda root=None: _cache_dir / "cache"
|
|
92
|
-
(_cache_dir / "cache").mkdir(parents=True, exist_ok=True)
|
|
93
|
-
|
|
94
|
-
target = Path(sys.argv[1])
|
|
95
|
-
|
|
96
92
|
# Three-axis exclusion. EXCLUDE_DIRS is basename-only (any path
|
|
97
93
|
# segment matching skips the file). EXCLUDE_PATHS holds multi-segment
|
|
98
94
|
# relative paths from .dxkit-ignore (e.g. 'app/modules/plugins/VendorPlugin')
|
|
@@ -274,407 +270,418 @@ def _strip_paren_suffix(label):
|
|
|
274
270
|
s = s.rsplit('.', 1)[1]
|
|
275
271
|
return s
|
|
276
272
|
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
#
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
#
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
#
|
|
386
|
-
#
|
|
387
|
-
#
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
}
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
#
|
|
422
|
-
#
|
|
423
|
-
#
|
|
424
|
-
|
|
425
|
-
#
|
|
426
|
-
#
|
|
427
|
-
#
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
273
|
+
if __name__ == '__main__':
|
|
274
|
+
# ProcessPoolExecutor workers re-import this module under spawn/
|
|
275
|
+
# forkserver (the Python 3.14 default on Linux); the __main__ guard
|
|
276
|
+
# keeps extraction from re-running per worker. graphify's own
|
|
277
|
+
# _extract_parallel requires this guard (it warns BrokenProcessPool
|
|
278
|
+
# and dies without it). See graphify/extract.py:_extract_parallel.
|
|
279
|
+
target = Path(sys.argv[1])
|
|
280
|
+
# graphify's on-disk cache is redirected here (the public cache_root
|
|
281
|
+
# param passed to extract() below) so it never lands in the target
|
|
282
|
+
# repo. The TS caller owns this dir's lifecycle — it lives under the
|
|
283
|
+
# ephemeral scriptDir and is removed after this process fully exits,
|
|
284
|
+
# which is the only point that survives graphify's atexit stat-index
|
|
285
|
+
# flush (graphify/cache.py registers _flush_stat_index at exit, so a
|
|
286
|
+
# Python-side rmtree here would be undone by that post-exit write).
|
|
287
|
+
_cache_dir = Path(sys.argv[2])
|
|
288
|
+
all_files = collect_files(target)
|
|
289
|
+
files = [f for f in all_files if not _is_excluded(f)]
|
|
290
|
+
if not files:
|
|
291
|
+
print(json.dumps({"error": "no files found"}))
|
|
292
|
+
sys.exit(0)
|
|
293
|
+
|
|
294
|
+
# Suppress progress output by redirecting stdout during extraction
|
|
295
|
+
import io
|
|
296
|
+
_real_stdout = sys.stdout
|
|
297
|
+
sys.stdout = io.StringIO()
|
|
298
|
+
result = extract(files, cache_root=_cache_dir)
|
|
299
|
+
sys.stdout = _real_stdout
|
|
300
|
+
G = build([result], directed=True)
|
|
301
|
+
communities = cluster(G)
|
|
302
|
+
|
|
303
|
+
# Functions vs modules
|
|
304
|
+
nodes = list(G.nodes(data=True))
|
|
305
|
+
functions = [(n, d) for n, d in nodes if "()" in d.get("label", "")]
|
|
306
|
+
modules = [(n, d) for n, d in nodes if "()" not in d.get("label", "")]
|
|
307
|
+
|
|
308
|
+
# Functions per file
|
|
309
|
+
file_funcs = Counter()
|
|
310
|
+
for n, d in functions:
|
|
311
|
+
sf = d.get("source_file", "")
|
|
312
|
+
file_funcs[sf] += 1
|
|
313
|
+
|
|
314
|
+
max_file = file_funcs.most_common(1)[0] if file_funcs else ("", 0)
|
|
315
|
+
|
|
316
|
+
# God nodes: graphifyy@0.5.0 renamed the result key "edges" → "degree".
|
|
317
|
+
gods = god_nodes(G, top_n=50)
|
|
318
|
+
god_count = sum(1 for g in gods if g["degree"] > 15)
|
|
319
|
+
|
|
320
|
+
# Cohesion
|
|
321
|
+
scores = score_all(G, communities) if communities else {}
|
|
322
|
+
avg_cohesion = sum(scores.values()) / len(scores) if scores else 0.0
|
|
323
|
+
|
|
324
|
+
# Orphan modules (no inbound imports)
|
|
325
|
+
import_targets = set()
|
|
326
|
+
for u, v, data in G.edges(data=True):
|
|
327
|
+
if data.get("relation") == "imports_from":
|
|
328
|
+
import_targets.add(v)
|
|
329
|
+
module_ids = set(n for n, d in modules)
|
|
330
|
+
orphans = module_ids - import_targets
|
|
331
|
+
|
|
332
|
+
# Dead imports (imported but never called)
|
|
333
|
+
call_targets = set()
|
|
334
|
+
for u, v, data in G.edges(data=True):
|
|
335
|
+
if data.get("relation") == "calls":
|
|
336
|
+
call_targets.add(v)
|
|
337
|
+
dead = import_targets - call_targets - module_ids
|
|
338
|
+
|
|
339
|
+
# Commented code ratio: source files with 0 function/class AST nodes
|
|
340
|
+
source_files_set = set()
|
|
341
|
+
files_with_nodes = set()
|
|
342
|
+
for n, d in nodes:
|
|
343
|
+
sf = d.get("source_file", "")
|
|
344
|
+
if sf:
|
|
345
|
+
source_files_set.add(sf)
|
|
346
|
+
if "()" in d.get("label", "") or any(
|
|
347
|
+
data.get("relation") == "method"
|
|
348
|
+
for _, _, data in G.edges(n, data=True)
|
|
349
|
+
):
|
|
350
|
+
files_with_nodes.add(sf)
|
|
351
|
+
|
|
352
|
+
total_src = len(source_files_set)
|
|
353
|
+
empty_files = total_src - len(files_with_nodes)
|
|
354
|
+
commented_ratio = empty_files / total_src if total_src > 0 else 0.0
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
# ── Build the full graph artifact ────────────────────────────────────────────
|
|
358
|
+
# 2.7 Sprint 1: emit nodes / edges / communities / symbolIndex alongside
|
|
359
|
+
# the aggregate metrics. Consumers (explore CLI, dashboard viz, future
|
|
360
|
+
# 2.8 context CLI + reachability) read this via src/explore/load.ts.
|
|
361
|
+
# Schema contract documented in tmp/2.7-graph-json-schema.md.
|
|
362
|
+
|
|
363
|
+
# Determine class membership: a module-shaped node is a CLASS if it has
|
|
364
|
+
# outbound 'method' edges to other nodes (it's the owner). A function-
|
|
365
|
+
# shaped node ("()" in label) is a METHOD if it has inbound 'method'
|
|
366
|
+
# edges from a class node; otherwise it's a free FUNCTION.
|
|
367
|
+
_class_owners = set()
|
|
368
|
+
_method_members = set()
|
|
369
|
+
for u, v, data in G.edges(data=True):
|
|
370
|
+
if data.get("relation") == "method":
|
|
371
|
+
_class_owners.add(u)
|
|
372
|
+
_method_members.add(v)
|
|
373
|
+
|
|
374
|
+
def _node_kind(nid, attrs):
|
|
375
|
+
label = attrs.get('label', '')
|
|
376
|
+
is_callable = '()' in label
|
|
377
|
+
if is_callable:
|
|
378
|
+
return 'method' if nid in _method_members else 'function'
|
|
379
|
+
return 'class' if nid in _class_owners else 'module'
|
|
380
|
+
|
|
381
|
+
# Make node sourceFile paths project-relative (graphify emits absolute
|
|
382
|
+
# paths derived from \`target = sys.argv[1]\`). Mirrors the existing
|
|
383
|
+
# maxFunctionsFilePath path-normalization at the TS layer.
|
|
384
|
+
def _rel(p):
|
|
385
|
+
if not p:
|
|
386
|
+
return ''
|
|
387
|
+
s = str(p).replace(os.sep, '/')
|
|
388
|
+
t = str(target).replace(os.sep, '/').rstrip('/')
|
|
389
|
+
if s.startswith(t + '/'):
|
|
390
|
+
return s[len(t) + 1:]
|
|
391
|
+
if s == t:
|
|
392
|
+
return ''
|
|
393
|
+
return s
|
|
394
|
+
|
|
395
|
+
# Assign stable in-run ids: n0, n1, n2, ... in extraction order. The
|
|
396
|
+
# graphify-internal id strings (long underscored slugs) work but bloat
|
|
397
|
+
# the JSON by ~20 bytes per node; the n<idx> shortening saves ~50KB on
|
|
398
|
+
# a 13k-node repo. IDs are NOT stable across runs (per schema doc).
|
|
399
|
+
_id_remap = {}
|
|
400
|
+
graph_nodes = []
|
|
401
|
+
for idx, (nid, attrs) in enumerate(nodes):
|
|
402
|
+
short_id = f'n{idx}'
|
|
403
|
+
_id_remap[nid] = short_id
|
|
404
|
+
line_no = _parse_line_no(attrs)
|
|
405
|
+
rel_source = _rel(attrs.get('source_file', ''))
|
|
406
|
+
label = attrs.get('label', '')
|
|
407
|
+
name = _strip_paren_suffix(label)
|
|
408
|
+
kind = _node_kind(nid, attrs)
|
|
409
|
+
node_obj = {
|
|
410
|
+
'id': short_id,
|
|
411
|
+
'kind': kind,
|
|
412
|
+
'label': label,
|
|
413
|
+
'sourceFile': rel_source,
|
|
414
|
+
}
|
|
415
|
+
if line_no:
|
|
416
|
+
node_obj['line'] = line_no
|
|
417
|
+
# Export detection only meaningful for symbol-bearing kinds
|
|
418
|
+
# (functions, classes, methods). Module-level "is this file
|
|
419
|
+
# exported?" isn't a useful question — exclude.
|
|
420
|
+
if kind in ('function', 'class', 'method'):
|
|
421
|
+
# Resolve to absolute path for the file-line cache (we read
|
|
422
|
+
# the raw source content; the cache key is the actual path
|
|
423
|
+
# on disk, not the project-relative form).
|
|
424
|
+
abs_source = attrs.get('source_file', '')
|
|
425
|
+
exported = _detect_exported(abs_source, line_no, name)
|
|
426
|
+
if exported is not None:
|
|
427
|
+
node_obj['exported'] = exported
|
|
428
|
+
graph_nodes.append(node_obj)
|
|
429
|
+
|
|
430
|
+
# Edges remapped to short ids. Drop self-loops and edges where either
|
|
431
|
+
# endpoint was filtered out (defensive — graphify shouldn't produce them
|
|
432
|
+
# but be tolerant). Graphify emits both 'imports' (broad form: \`import X\`)
|
|
433
|
+
# and 'imports_from' (\`from X import Y\` / \`import {Y} from X\`); both
|
|
434
|
+
# carry the same semantic for our schema ("A imports from B"). Merge
|
|
435
|
+
# both into the canonical 'imports_from' edge relation. The 'contains'
|
|
436
|
+
# and 'inherits' relations graphify also produces are intentionally
|
|
437
|
+
# dropped — 'contains' duplicates the file/symbol-membership info
|
|
438
|
+
# already encoded in nodes' sourceFile field, and 'inherits' is
|
|
439
|
+
# class-inheritance which isn't yet a first-class schema relation.
|
|
440
|
+
graph_edges = []
|
|
441
|
+
for u, v, data in G.edges(data=True):
|
|
442
|
+
if u not in _id_remap or v not in _id_remap:
|
|
443
|
+
continue
|
|
444
|
+
graphify_relation = data.get('relation', '')
|
|
445
|
+
if graphify_relation == 'calls':
|
|
446
|
+
relation = 'calls'
|
|
447
|
+
elif graphify_relation in ('imports', 'imports_from'):
|
|
448
|
+
relation = 'imports_from'
|
|
449
|
+
elif graphify_relation == 'method':
|
|
450
|
+
relation = 'method'
|
|
451
|
+
else:
|
|
452
|
+
continue
|
|
453
|
+
edge_obj = {
|
|
454
|
+
'from': _id_remap[u],
|
|
455
|
+
'to': _id_remap[v],
|
|
456
|
+
'relation': relation,
|
|
457
|
+
}
|
|
458
|
+
graph_edges.append(edge_obj)
|
|
459
|
+
|
|
460
|
+
# Communities: for each cluster compute dominantSourceDir + dominantPack.
|
|
461
|
+
# dominantSourceDir = most common ancestor directory (the longest
|
|
462
|
+
# leading-segment path that >= 40% of members share); empty string when
|
|
463
|
+
# no clear dominant. dominantPack = most common pack id among member
|
|
464
|
+
# files' extensions; empty when no dominant pack.
|
|
465
|
+
def _ancestor_dir(rel_path):
|
|
466
|
+
if not rel_path or '/' not in rel_path:
|
|
467
|
+
return ''
|
|
468
|
+
return rel_path.rsplit('/', 1)[0] + '/'
|
|
469
|
+
|
|
470
|
+
graph_communities = []
|
|
471
|
+
# Graphify's cluster() returns dict[community_id: list[node_id]].
|
|
472
|
+
# Iterate via .items(); the community_id is the actual cluster
|
|
473
|
+
# identifier (used to look up cohesion in scores), members is the
|
|
474
|
+
# node-id list.
|
|
475
|
+
_node_attrs_by_id = dict(nodes)
|
|
476
|
+
for cidx, member_list in communities.items():
|
|
477
|
+
member_ids = sorted(_id_remap.get(n, '') for n in member_list if n in _id_remap)
|
|
478
|
+
member_ids = [m for m in member_ids if m]
|
|
479
|
+
if not member_ids:
|
|
480
|
+
continue
|
|
481
|
+
# Per-member source files (project-relative)
|
|
482
|
+
member_files = []
|
|
483
|
+
for nid in member_list:
|
|
484
|
+
if nid in _id_remap:
|
|
485
|
+
sf = _rel(_node_attrs_by_id.get(nid, {}).get('source_file', ''))
|
|
486
|
+
if sf:
|
|
487
|
+
member_files.append(sf)
|
|
488
|
+
# Dominant directory: longest common ancestor that >= 40% of
|
|
489
|
+
# members share (or empty if no clear winner).
|
|
490
|
+
dir_counter = Counter(_ancestor_dir(f) for f in member_files)
|
|
491
|
+
dir_counter.pop('', None)
|
|
492
|
+
dominant_dir = ''
|
|
493
|
+
if dir_counter:
|
|
494
|
+
top_dir, top_count = dir_counter.most_common(1)[0]
|
|
495
|
+
if top_count / len(member_files) >= 0.4:
|
|
496
|
+
dominant_dir = top_dir
|
|
497
|
+
# Dominant pack
|
|
498
|
+
pack_counter = Counter()
|
|
499
|
+
for f in member_files:
|
|
500
|
+
pk = _EXT_TO_PACK.get(_ext_of(f))
|
|
501
|
+
if pk:
|
|
502
|
+
pack_counter[pk] += 1
|
|
503
|
+
dominant_pack = ''
|
|
504
|
+
if pack_counter:
|
|
505
|
+
top_pack, top_pack_count = pack_counter.most_common(1)[0]
|
|
506
|
+
if top_pack_count / max(1, len(member_files)) >= 0.5:
|
|
507
|
+
dominant_pack = top_pack
|
|
508
|
+
cohesion = float(scores.get(cidx, 0.0)) if scores else 0.0
|
|
509
|
+
graph_communities.append({
|
|
510
|
+
'id': cidx,
|
|
511
|
+
'nodeIds': member_ids,
|
|
512
|
+
'cohesion': round(cohesion, 3),
|
|
513
|
+
'dominantSourceDir': dominant_dir,
|
|
514
|
+
'dominantPack': dominant_pack,
|
|
515
|
+
})
|
|
516
|
+
|
|
517
|
+
# Symbol index: lowercased label (without trailing ()) → list of nodeIds.
|
|
518
|
+
_symbol_index = {}
|
|
519
|
+
for node_obj in graph_nodes:
|
|
520
|
+
key = _strip_paren_suffix(node_obj['label']).lower()
|
|
521
|
+
if not key:
|
|
522
|
+
continue
|
|
523
|
+
_symbol_index.setdefault(key, []).append(node_obj['id'])
|
|
524
|
+
|
|
525
|
+
# Active-pack detection: derive from extensions seen in source files.
|
|
526
|
+
_packs_seen = sorted({_EXT_TO_PACK[e] for e in (_ext_of(_rel(d.get('source_file', '')))
|
|
527
|
+
for _, d in nodes)
|
|
528
|
+
if e in _EXT_TO_PACK})
|
|
529
|
+
|
|
530
|
+
# Size-budget enforcement. Hard cap 50MB serialized. If we exceed,
|
|
531
|
+
# drop method edges first (densest class — structural noise, doesn't
|
|
532
|
+
# affect call-graph queries).
|
|
533
|
+
import datetime as _dt
|
|
534
|
+
_meta = {
|
|
535
|
+
'tool': 'graphify',
|
|
536
|
+
'graphifyVersion': '', # filled by TS-side post-parse (read from graphifyy package version)
|
|
537
|
+
'dxkitVersion': '', # filled by TS-side post-parse (read from package.json)
|
|
538
|
+
'generatedAt': _dt.datetime.now(_dt.timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ'),
|
|
539
|
+
'sourceFilesInGraph': total_src,
|
|
540
|
+
'excludedFileCount': len(all_files) - len(files),
|
|
541
|
+
'packs': _packs_seen,
|
|
542
|
+
'truncated': False,
|
|
543
|
+
'truncatedReason': '',
|
|
446
544
|
}
|
|
447
|
-
graph_edges.append(edge_obj)
|
|
448
|
-
|
|
449
|
-
# Communities: for each cluster compute dominantSourceDir + dominantPack.
|
|
450
|
-
# dominantSourceDir = most common ancestor directory (the longest
|
|
451
|
-
# leading-segment path that >= 40% of members share); empty string when
|
|
452
|
-
# no clear dominant. dominantPack = most common pack id among member
|
|
453
|
-
# files' extensions; empty when no dominant pack.
|
|
454
|
-
def _ancestor_dir(rel_path):
|
|
455
|
-
if not rel_path or '/' not in rel_path:
|
|
456
|
-
return ''
|
|
457
|
-
return rel_path.rsplit('/', 1)[0] + '/'
|
|
458
|
-
|
|
459
|
-
graph_communities = []
|
|
460
|
-
# Graphify's cluster() returns dict[community_id: list[node_id]].
|
|
461
|
-
# Iterate via .items(); the community_id is the actual cluster
|
|
462
|
-
# identifier (used to look up cohesion in scores), members is the
|
|
463
|
-
# node-id list.
|
|
464
|
-
_node_attrs_by_id = dict(nodes)
|
|
465
|
-
for cidx, member_list in communities.items():
|
|
466
|
-
member_ids = sorted(_id_remap.get(n, '') for n in member_list if n in _id_remap)
|
|
467
|
-
member_ids = [m for m in member_ids if m]
|
|
468
|
-
if not member_ids:
|
|
469
|
-
continue
|
|
470
|
-
# Per-member source files (project-relative)
|
|
471
|
-
member_files = []
|
|
472
|
-
for nid in member_list:
|
|
473
|
-
if nid in _id_remap:
|
|
474
|
-
sf = _rel(_node_attrs_by_id.get(nid, {}).get('source_file', ''))
|
|
475
|
-
if sf:
|
|
476
|
-
member_files.append(sf)
|
|
477
|
-
# Dominant directory: longest common ancestor that >= 40% of
|
|
478
|
-
# members share (or empty if no clear winner).
|
|
479
|
-
dir_counter = Counter(_ancestor_dir(f) for f in member_files)
|
|
480
|
-
dir_counter.pop('', None)
|
|
481
|
-
dominant_dir = ''
|
|
482
|
-
if dir_counter:
|
|
483
|
-
top_dir, top_count = dir_counter.most_common(1)[0]
|
|
484
|
-
if top_count / len(member_files) >= 0.4:
|
|
485
|
-
dominant_dir = top_dir
|
|
486
|
-
# Dominant pack
|
|
487
|
-
pack_counter = Counter()
|
|
488
|
-
for f in member_files:
|
|
489
|
-
pk = _EXT_TO_PACK.get(_ext_of(f))
|
|
490
|
-
if pk:
|
|
491
|
-
pack_counter[pk] += 1
|
|
492
|
-
dominant_pack = ''
|
|
493
|
-
if pack_counter:
|
|
494
|
-
top_pack, top_pack_count = pack_counter.most_common(1)[0]
|
|
495
|
-
if top_pack_count / max(1, len(member_files)) >= 0.5:
|
|
496
|
-
dominant_pack = top_pack
|
|
497
|
-
cohesion = float(scores.get(cidx, 0.0)) if scores else 0.0
|
|
498
|
-
graph_communities.append({
|
|
499
|
-
'id': cidx,
|
|
500
|
-
'nodeIds': member_ids,
|
|
501
|
-
'cohesion': round(cohesion, 3),
|
|
502
|
-
'dominantSourceDir': dominant_dir,
|
|
503
|
-
'dominantPack': dominant_pack,
|
|
504
|
-
})
|
|
505
|
-
|
|
506
|
-
# Symbol index: lowercased label (without trailing ()) → list of nodeIds.
|
|
507
|
-
_symbol_index = {}
|
|
508
|
-
for node_obj in graph_nodes:
|
|
509
|
-
key = _strip_paren_suffix(node_obj['label']).lower()
|
|
510
|
-
if not key:
|
|
511
|
-
continue
|
|
512
|
-
_symbol_index.setdefault(key, []).append(node_obj['id'])
|
|
513
|
-
|
|
514
|
-
# Active-pack detection: derive from extensions seen in source files.
|
|
515
|
-
_packs_seen = sorted({_EXT_TO_PACK[e] for e in (_ext_of(_rel(d.get('source_file', '')))
|
|
516
|
-
for _, d in nodes)
|
|
517
|
-
if e in _EXT_TO_PACK})
|
|
518
|
-
|
|
519
|
-
# Size-budget enforcement. Hard cap 50MB serialized. If we exceed,
|
|
520
|
-
# drop method edges first (densest class — structural noise, doesn't
|
|
521
|
-
# affect call-graph queries).
|
|
522
|
-
import datetime as _dt
|
|
523
|
-
_meta = {
|
|
524
|
-
'tool': 'graphify',
|
|
525
|
-
'graphifyVersion': '', # filled by TS-side post-parse (read from graphifyy package version)
|
|
526
|
-
'dxkitVersion': '', # filled by TS-side post-parse (read from package.json)
|
|
527
|
-
'generatedAt': _dt.datetime.now(_dt.timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ'),
|
|
528
|
-
'sourceFilesInGraph': total_src,
|
|
529
|
-
'excludedFileCount': len(all_files) - len(files),
|
|
530
|
-
'packs': _packs_seen,
|
|
531
|
-
'truncated': False,
|
|
532
|
-
'truncatedReason': '',
|
|
533
|
-
}
|
|
534
545
|
|
|
535
|
-
_graph_payload = {
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
}
|
|
546
|
+
_graph_payload = {
|
|
547
|
+
'schemaVersion': 1,
|
|
548
|
+
'meta': _meta,
|
|
549
|
+
'nodes': graph_nodes,
|
|
550
|
+
'edges': graph_edges,
|
|
551
|
+
'communities': graph_communities,
|
|
552
|
+
'symbolIndex': _symbol_index,
|
|
553
|
+
}
|
|
543
554
|
|
|
544
|
-
# Cheap pre-check on size: serialize once, measure, drop method edges
|
|
545
|
-
# if over the cap, re-serialize. The 50MB cap matches the schema
|
|
546
|
-
# contract; 10MB soft target is informational only (no enforcement).
|
|
547
|
-
_BYTES_HARD_CAP = 50 * 1024 * 1024
|
|
548
|
-
|
|
549
|
-
def _serialize(payload):
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
_graph_json = _serialize(_graph_payload)
|
|
553
|
-
if len(_graph_json.encode('utf-8')) > _BYTES_HARD_CAP:
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
)
|
|
565
|
-
|
|
566
|
-
# Render the interactive viewer alongside graph.json so the dashboard
|
|
567
|
-
# Graph tab can embed it. graphify ships its own vis.js-based renderer
|
|
568
|
-
# (graphify.export.to_html). Two emission paths:
|
|
569
|
-
#
|
|
570
|
-
# - Full graph (G.number_of_nodes() <= MAX_NODES_FOR_VIZ = 5000):
|
|
571
|
-
# pass the original G + communities. The viewer renders every
|
|
572
|
-
# symbol; the user can zoom + drill.
|
|
573
|
-
#
|
|
574
|
-
# - Aggregated community view (G > MAX_NODES_FOR_VIZ): build a
|
|
575
|
-
# networkx super-graph whose nodes ARE the communities. Sized by
|
|
576
|
-
# member count via graphify member_counts parameter. Inter-
|
|
577
|
-
# community edges aggregated to weighted edges. This lets a
|
|
578
|
-
# customer-scale repo still get a meaningful "what does this
|
|
579
|
-
# codebase look like" viz instead of a dead empty-state.
|
|
580
|
-
#
|
|
581
|
-
# Either way failures are non-fatal: the dashboard surfaces a clear
|
|
582
|
-
# empty-state when graph.html isn't on disk.
|
|
583
|
-
try:
|
|
584
|
-
from graphify.export import to_html as _to_html, MAX_NODES_FOR_VIZ as _MAX_VIZ
|
|
585
|
-
import networkx as _nx
|
|
586
|
-
_html_dir = target / '.dxkit' / 'reports'
|
|
587
|
-
_html_dir.mkdir(parents=True, exist_ok=True)
|
|
588
|
-
_html_path = _html_dir / 'graph.html'
|
|
589
|
-
|
|
590
|
-
if G.number_of_nodes() <= _MAX_VIZ:
|
|
591
|
-
_labels = {
|
|
592
|
-
c['id']: (c.get('dominantSourceDir') or f"community-{c['id']}")
|
|
593
|
-
for c in graph_communities
|
|
594
|
-
}
|
|
595
|
-
_to_html(G, communities, str(_html_path), community_labels=_labels)
|
|
596
|
-
_viz_mode = 'full'
|
|
597
|
-
else:
|
|
598
|
-
# Aggregated community super-graph.
|
|
599
|
-
_node_to_comm = {}
|
|
600
|
-
for _cid, _members in communities.items():
|
|
601
|
-
for _nid in _members:
|
|
602
|
-
_node_to_comm[_nid] = _cid
|
|
603
|
-
|
|
604
|
-
_G_agg = _nx.DiGraph()
|
|
605
|
-
_member_counts = {}
|
|
606
|
-
_labels = {}
|
|
607
|
-
for _c in graph_communities:
|
|
608
|
-
_cid = _c['id']
|
|
609
|
-
_label = _c.get('dominantSourceDir') or f"community-{_cid}"
|
|
610
|
-
# vis.js node attrs: label drives display; file_type is
|
|
611
|
-
# surfaced in graphify's sidebar so we set a sentinel
|
|
612
|
-
# value the dashboard can grep on.
|
|
613
|
-
_G_agg.add_node(_cid, label=_label, source_file='', file_type='community')
|
|
614
|
-
_member_counts[_cid] = len(_c['nodeIds'])
|
|
615
|
-
_labels[_cid] = _label
|
|
616
|
-
|
|
617
|
-
# Cross-community edge aggregation. Counter keyed on
|
|
618
|
-
# (smaller_id, larger_id) for undirected aggregation; we then
|
|
619
|
-
# add a directed edge in one canonical direction so vis.js
|
|
620
|
-
# has a definite source/target. The viewer doesn't show
|
|
621
|
-
# arrows on these (they're community connections, not calls).
|
|
622
|
-
from collections import Counter as _CommCounter
|
|
623
|
-
_edge_w = _CommCounter()
|
|
624
|
-
for _u, _v, _ in G.edges(data=True):
|
|
625
|
-
_cu = _node_to_comm.get(_u)
|
|
626
|
-
_cv = _node_to_comm.get(_v)
|
|
627
|
-
if _cu is None or _cv is None or _cu == _cv:
|
|
628
|
-
continue
|
|
629
|
-
_key = (_cu, _cv) if _cu < _cv else (_cv, _cu)
|
|
630
|
-
_edge_w[_key] += 1
|
|
631
|
-
for (_a, _b), _w in _edge_w.items():
|
|
632
|
-
_G_agg.add_edge(_a, _b, relation='inter_community', occurrences=_w)
|
|
633
|
-
|
|
634
|
-
# to_html requires a communities dict; one-element groups
|
|
635
|
-
# treat each aggregated node as its own community so each
|
|
636
|
-
# community keeps a distinct color in graphify's palette.
|
|
637
|
-
_agg_groups = {_cid: [_cid] for _cid in communities}
|
|
638
|
-
|
|
639
|
-
_to_html(
|
|
640
|
-
_G_agg, _agg_groups, str(_html_path),
|
|
641
|
-
community_labels=_labels, member_counts=_member_counts,
|
|
555
|
+
# Cheap pre-check on size: serialize once, measure, drop method edges
|
|
556
|
+
# if over the cap, re-serialize. The 50MB cap matches the schema
|
|
557
|
+
# contract; 10MB soft target is informational only (no enforcement).
|
|
558
|
+
_BYTES_HARD_CAP = 50 * 1024 * 1024
|
|
559
|
+
|
|
560
|
+
def _serialize(payload):
|
|
561
|
+
return json.dumps(payload, separators=(',', ':'))
|
|
562
|
+
|
|
563
|
+
_graph_json = _serialize(_graph_payload)
|
|
564
|
+
if len(_graph_json.encode('utf-8')) > _BYTES_HARD_CAP:
|
|
565
|
+
# Drop method edges first; they're structural (class-owns-method),
|
|
566
|
+
# not behavioral. Call + import edges carry the actionable info.
|
|
567
|
+
pre_count = len(_graph_payload['edges'])
|
|
568
|
+
_graph_payload['edges'] = [e for e in _graph_payload['edges']
|
|
569
|
+
if e['relation'] != 'method']
|
|
570
|
+
post_count = len(_graph_payload['edges'])
|
|
571
|
+
_meta['truncated'] = True
|
|
572
|
+
_meta['truncatedReason'] = (
|
|
573
|
+
f"dropped {pre_count - post_count} method edges to fit under "
|
|
574
|
+
f"the 50MB hard cap"
|
|
642
575
|
)
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
#
|
|
646
|
-
#
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
576
|
+
|
|
577
|
+
# Render the interactive viewer alongside graph.json so the dashboard
|
|
578
|
+
# Graph tab can embed it. graphify ships its own vis.js-based renderer
|
|
579
|
+
# (graphify.export.to_html). Two emission paths:
|
|
580
|
+
#
|
|
581
|
+
# - Full graph (G.number_of_nodes() <= MAX_NODES_FOR_VIZ = 5000):
|
|
582
|
+
# pass the original G + communities. The viewer renders every
|
|
583
|
+
# symbol; the user can zoom + drill.
|
|
584
|
+
#
|
|
585
|
+
# - Aggregated community view (G > MAX_NODES_FOR_VIZ): build a
|
|
586
|
+
# networkx super-graph whose nodes ARE the communities. Sized by
|
|
587
|
+
# member count via graphify member_counts parameter. Inter-
|
|
588
|
+
# community edges aggregated to weighted edges. This lets a
|
|
589
|
+
# customer-scale repo still get a meaningful "what does this
|
|
590
|
+
# codebase look like" viz instead of a dead empty-state.
|
|
591
|
+
#
|
|
592
|
+
# Either way failures are non-fatal: the dashboard surfaces a clear
|
|
593
|
+
# empty-state when graph.html isn't on disk.
|
|
594
|
+
try:
|
|
595
|
+
from graphify.export import to_html as _to_html, MAX_NODES_FOR_VIZ as _MAX_VIZ
|
|
596
|
+
import networkx as _nx
|
|
597
|
+
_html_dir = target / '.dxkit' / 'reports'
|
|
598
|
+
_html_dir.mkdir(parents=True, exist_ok=True)
|
|
599
|
+
_html_path = _html_dir / 'graph.html'
|
|
600
|
+
|
|
601
|
+
if G.number_of_nodes() <= _MAX_VIZ:
|
|
602
|
+
_labels = {
|
|
603
|
+
c['id']: (c.get('dominantSourceDir') or f"community-{c['id']}")
|
|
604
|
+
for c in graph_communities
|
|
605
|
+
}
|
|
606
|
+
_to_html(G, communities, str(_html_path), community_labels=_labels)
|
|
607
|
+
_viz_mode = 'full'
|
|
608
|
+
else:
|
|
609
|
+
# Aggregated community super-graph.
|
|
610
|
+
_node_to_comm = {}
|
|
611
|
+
for _cid, _members in communities.items():
|
|
612
|
+
for _nid in _members:
|
|
613
|
+
_node_to_comm[_nid] = _cid
|
|
614
|
+
|
|
615
|
+
_G_agg = _nx.DiGraph()
|
|
616
|
+
_member_counts = {}
|
|
617
|
+
_labels = {}
|
|
618
|
+
for _c in graph_communities:
|
|
619
|
+
_cid = _c['id']
|
|
620
|
+
_label = _c.get('dominantSourceDir') or f"community-{_cid}"
|
|
621
|
+
# vis.js node attrs: label drives display; file_type is
|
|
622
|
+
# surfaced in graphify's sidebar so we set a sentinel
|
|
623
|
+
# value the dashboard can grep on.
|
|
624
|
+
_G_agg.add_node(_cid, label=_label, source_file='', file_type='community')
|
|
625
|
+
_member_counts[_cid] = len(_c['nodeIds'])
|
|
626
|
+
_labels[_cid] = _label
|
|
627
|
+
|
|
628
|
+
# Cross-community edge aggregation. Counter keyed on
|
|
629
|
+
# (smaller_id, larger_id) for undirected aggregation; we then
|
|
630
|
+
# add a directed edge in one canonical direction so vis.js
|
|
631
|
+
# has a definite source/target. The viewer doesn't show
|
|
632
|
+
# arrows on these (they're community connections, not calls).
|
|
633
|
+
from collections import Counter as _CommCounter
|
|
634
|
+
_edge_w = _CommCounter()
|
|
635
|
+
for _u, _v, _ in G.edges(data=True):
|
|
636
|
+
_cu = _node_to_comm.get(_u)
|
|
637
|
+
_cv = _node_to_comm.get(_v)
|
|
638
|
+
if _cu is None or _cv is None or _cu == _cv:
|
|
639
|
+
continue
|
|
640
|
+
_key = (_cu, _cv) if _cu < _cv else (_cv, _cu)
|
|
641
|
+
_edge_w[_key] += 1
|
|
642
|
+
for (_a, _b), _w in _edge_w.items():
|
|
643
|
+
_G_agg.add_edge(_a, _b, relation='inter_community', occurrences=_w)
|
|
644
|
+
|
|
645
|
+
# to_html requires a communities dict; one-element groups
|
|
646
|
+
# treat each aggregated node as its own community so each
|
|
647
|
+
# community keeps a distinct color in graphify's palette.
|
|
648
|
+
_agg_groups = {_cid: [_cid] for _cid in communities}
|
|
649
|
+
|
|
650
|
+
_to_html(
|
|
651
|
+
_G_agg, _agg_groups, str(_html_path),
|
|
652
|
+
community_labels=_labels, member_counts=_member_counts,
|
|
653
|
+
)
|
|
654
|
+
_viz_mode = 'aggregated'
|
|
655
|
+
|
|
656
|
+
# Sidecar so the dashboard renderer can label the view honestly.
|
|
657
|
+
# JSON is tiny (~120B); avoids parsing graph.json twice from TS.
|
|
658
|
+
_meta_path = _html_dir / 'graph.html.meta.json'
|
|
659
|
+
_meta_path.write_text(json.dumps({
|
|
660
|
+
'mode': _viz_mode,
|
|
661
|
+
'totalNodes': G.number_of_nodes(),
|
|
662
|
+
'totalEdges': G.number_of_edges(),
|
|
663
|
+
'communities': len(communities),
|
|
664
|
+
'aggregatedNodeCount': len(communities) if _viz_mode == 'aggregated' else None,
|
|
665
|
+
}))
|
|
666
|
+
except Exception as _html_err:
|
|
667
|
+
sys.stderr.write(f"dxkit: graph.html not generated ({_html_err})\\n")
|
|
668
|
+
|
|
669
|
+
print(json.dumps({
|
|
670
|
+
"functionCount": len(functions),
|
|
671
|
+
"classCount": len([n for n, d in modules if any(
|
|
672
|
+
data.get("relation") == "method" for _, _, data in G.edges(n, data=True)
|
|
673
|
+
)]),
|
|
674
|
+
"maxFunctionsInFile": max_file[1] if max_file else 0,
|
|
675
|
+
"maxFunctionsFilePath": str(max_file[0]) if max_file else "",
|
|
676
|
+
"godNodeCount": god_count,
|
|
677
|
+
"communityCount": len(communities),
|
|
678
|
+
"avgCohesion": round(avg_cohesion, 3),
|
|
679
|
+
"orphanModuleCount": len(orphans),
|
|
680
|
+
"deadImportCount": len(dead),
|
|
681
|
+
"commentedCodeRatio": round(commented_ratio, 3),
|
|
682
|
+
"sourceFilesInGraph": total_src,
|
|
683
|
+
"graph": _graph_payload,
|
|
654
684
|
}))
|
|
655
|
-
except Exception as _html_err:
|
|
656
|
-
sys.stderr.write(f"dxkit: graph.html not generated ({_html_err})\\n")
|
|
657
|
-
|
|
658
|
-
# Clean up temp cache
|
|
659
|
-
import shutil
|
|
660
|
-
shutil.rmtree(str(_cache_dir), ignore_errors=True)
|
|
661
|
-
|
|
662
|
-
print(json.dumps({
|
|
663
|
-
"functionCount": len(functions),
|
|
664
|
-
"classCount": len([n for n, d in modules if any(
|
|
665
|
-
data.get("relation") == "method" for _, _, data in G.edges(n, data=True)
|
|
666
|
-
)]),
|
|
667
|
-
"maxFunctionsInFile": max_file[1] if max_file else 0,
|
|
668
|
-
"maxFunctionsFilePath": str(max_file[0]) if max_file else "",
|
|
669
|
-
"godNodeCount": god_count,
|
|
670
|
-
"communityCount": len(communities),
|
|
671
|
-
"avgCohesion": round(avg_cohesion, 3),
|
|
672
|
-
"orphanModuleCount": len(orphans),
|
|
673
|
-
"deadImportCount": len(dead),
|
|
674
|
-
"commentedCodeRatio": round(commented_ratio, 3),
|
|
675
|
-
"sourceFilesInGraph": total_src,
|
|
676
|
-
"graph": _graph_payload,
|
|
677
|
-
}))
|
|
678
685
|
`;
|
|
679
686
|
}
|
|
680
687
|
/**
|
|
@@ -781,6 +788,15 @@ async function computeAndCache(cwd) {
|
|
|
781
788
|
// don't litter /tmp across runs.
|
|
782
789
|
const scriptDir = fs.mkdtempSync(path.join(os.tmpdir(), 'dxkit-graphify-'));
|
|
783
790
|
const scriptPath = path.join(scriptDir, 'run.py');
|
|
791
|
+
// graphify's on-disk AST cache is redirected here (passed to the script
|
|
792
|
+
// as argv[2] → extract(cache_root=...)), keeping it out of the target
|
|
793
|
+
// repo. It lives under scriptDir so the single `fs.rmSync(scriptDir)`
|
|
794
|
+
// below reclaims it — crucially AFTER the Python process and its atexit
|
|
795
|
+
// handlers exit. graphify flushes a stat-index via atexit
|
|
796
|
+
// (graphify/cache.py), so cleaning the cache from inside the script
|
|
797
|
+
// would be undone by that post-exit write; owning the lifecycle here is
|
|
798
|
+
// the only leak-free point.
|
|
799
|
+
const cacheDir = path.join(scriptDir, 'graphify-cache');
|
|
784
800
|
fs.writeFileSync(scriptPath, buildGraphifyScript(cwd));
|
|
785
801
|
// Spawn-with-process-group so the Python interpreter + any
|
|
786
802
|
// tree-sitter worker subprocesses it starts are all killed
|
|
@@ -793,7 +809,7 @@ async function computeAndCache(cwd) {
|
|
|
793
809
|
//
|
|
794
810
|
// runDetached captures stderr natively so the tempfile redirect
|
|
795
811
|
// pattern is no longer needed — same effect, fewer moving parts.
|
|
796
|
-
const outcome = await (0, runner_1.runDetached)(pythonCmd, [scriptPath, cwd], {
|
|
812
|
+
const outcome = await (0, runner_1.runDetached)(pythonCmd, [scriptPath, cwd, cacheDir], {
|
|
797
813
|
cwd: scriptDir,
|
|
798
814
|
timeoutMs: 300000, // 5 min — bumped from 120000 in 2.4.7 for multi-thousand-file frontend repos
|
|
799
815
|
});
|