index-graph 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- index_graph/__init__.py +16 -0
- index_graph/__main__.py +4 -0
- index_graph/classify.py +29 -0
- index_graph/cli.py +194 -0
- index_graph/config.py +124 -0
- index_graph/context/__init__.py +1 -0
- index_graph/context/pack.py +98 -0
- index_graph/gitmeta.py +47 -0
- index_graph/graph/__init__.py +4 -0
- index_graph/graph/build.py +127 -0
- index_graph/graph/edges.py +76 -0
- index_graph/graph/resolvers/__init__.py +5 -0
- index_graph/graph/resolvers/base.py +28 -0
- index_graph/graph/resolvers/javascript.py +64 -0
- index_graph/graph/resolvers/python.py +113 -0
- index_graph/graph/roles.py +65 -0
- index_graph/graph/walk.py +28 -0
- index_graph/model.py +67 -0
- index_graph/scan.py +116 -0
- index_graph/viz/__init__.py +12 -0
- index_graph/viz/charts.py +49 -0
- index_graph/viz/html.py +99 -0
- index_graph/viz/layout.py +210 -0
- index_graph/viz/manifest.py +43 -0
- index_graph/viz/mermaid.py +70 -0
- index_graph/viz/svg.py +70 -0
- index_graph/viz/theme.py +62 -0
- index_graph-1.0.0.dist-info/METADATA +148 -0
- index_graph-1.0.0.dist-info/RECORD +33 -0
- index_graph-1.0.0.dist-info/WHEEL +4 -0
- index_graph-1.0.0.dist-info/entry_points.txt +2 -0
- index_graph-1.0.0.dist-info/licenses/AUTHORS.md +1 -0
- index_graph-1.0.0.dist-info/licenses/LICENSE +21 -0
index_graph/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Compact JSON repository inventory maps for multi-repo workspaces."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from .classify import classify
|
|
6
|
+
from .config import Config, Rule, default_config, load_config
|
|
7
|
+
from .model import SCHEMA_VERSION, Map, RepoRow
|
|
8
|
+
from .scan import build_map, discover_repos, write_map
|
|
9
|
+
|
|
10
|
+
__version__ = "1.0.0"
|
|
11
|
+
__all__ = [
|
|
12
|
+
"build_map", "write_map", "discover_repos",
|
|
13
|
+
"Map", "RepoRow", "SCHEMA_VERSION",
|
|
14
|
+
"Config", "Rule", "load_config", "default_config",
|
|
15
|
+
"classify", "__version__",
|
|
16
|
+
]
|
index_graph/__main__.py
ADDED
index_graph/classify.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Pure classification: ordered glob rules, then a remote-host fallback."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from urllib.parse import urlsplit
|
|
6
|
+
|
|
7
|
+
from .config import PUBLIC_HOSTS, Config
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _remote_host(origin: str) -> str | None:
|
|
11
|
+
if not origin:
|
|
12
|
+
return None
|
|
13
|
+
if "://" not in origin and "@" in origin and ":" in origin:
|
|
14
|
+
# scp-like SSH form: git@github.com:owner/repo.git
|
|
15
|
+
return origin.split("@", 1)[1].split(":", 1)[0] or None
|
|
16
|
+
return urlsplit(origin).hostname
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def classify(path: str, is_repo: bool, origin: str, config: Config) -> str:
|
|
20
|
+
for rule in config.rules:
|
|
21
|
+
if rule.regex.match(path):
|
|
22
|
+
return rule.class_
|
|
23
|
+
if is_repo:
|
|
24
|
+
host = _remote_host(origin)
|
|
25
|
+
if host is None:
|
|
26
|
+
return "local"
|
|
27
|
+
return "public" if host in PUBLIC_HOSTS else "private"
|
|
28
|
+
name = path.rsplit("/", 1)[-1]
|
|
29
|
+
return "hidden" if name.startswith(".") else "entry"
|
index_graph/cli.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"""Command-line entry point: map (default) + graph + context subcommands."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
import json
|
|
6
|
+
import sys
|
|
7
|
+
from dataclasses import replace
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from . import __version__
|
|
11
|
+
from .config import load_config
|
|
12
|
+
from .context.pack import closure, focus_subgraph, render_text, to_json
|
|
13
|
+
from .graph.build import build_graph
|
|
14
|
+
from .scan import build_map, discover_repos, write_map
|
|
15
|
+
|
|
16
|
+
_SUBCOMMANDS = {"map", "graph", "context", "viz"}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _add_map_args(p: argparse.ArgumentParser) -> None:
|
|
20
|
+
p.add_argument("--root", type=Path, default=Path.cwd())
|
|
21
|
+
p.add_argument("--output", type=Path, default=None)
|
|
22
|
+
p.add_argument("--json", action="store_true")
|
|
23
|
+
p.add_argument("--config", type=Path, default=None)
|
|
24
|
+
p.add_argument("--jobs", type=int, default=None)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
28
|
+
parser = argparse.ArgumentParser(
|
|
29
|
+
prog="index",
|
|
30
|
+
description="Repository inventory maps + dependency graph + context packs.")
|
|
31
|
+
parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
|
|
32
|
+
sub = parser.add_subparsers(dest="cmd")
|
|
33
|
+
|
|
34
|
+
_add_map_args(sub.add_parser("map", help="Write the repository inventory map (default)."))
|
|
35
|
+
|
|
36
|
+
g = sub.add_parser("graph", help="Derive the repo-level dependency graph.")
|
|
37
|
+
g.add_argument("--root", type=Path, default=Path.cwd())
|
|
38
|
+
g.add_argument("--json", action="store_true")
|
|
39
|
+
|
|
40
|
+
c = sub.add_parser("context", help="Render the synthesis context pack.")
|
|
41
|
+
c.add_argument("--root", type=Path, default=Path.cwd())
|
|
42
|
+
c.add_argument("--json", action="store_true")
|
|
43
|
+
c.add_argument("--focus", default=None)
|
|
44
|
+
c.add_argument("--audit", action="store_true")
|
|
45
|
+
|
|
46
|
+
v = sub.add_parser("viz", help="Render the dependency graph (html/svg/mermaid).")
|
|
47
|
+
v.add_argument("--root", type=Path, default=Path.cwd())
|
|
48
|
+
v.add_argument("--format", choices=["html", "svg", "mermaid", "all"], default="html")
|
|
49
|
+
v.add_argument("--focus", default=None)
|
|
50
|
+
v.add_argument("--no-external", action="store_true")
|
|
51
|
+
v.add_argument("--out", default=None)
|
|
52
|
+
v.add_argument("--out-dir", default=None)
|
|
53
|
+
return parser
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _repo_paths(root: Path) -> dict[str, Path]:
|
|
57
|
+
# discover_repos requires a Config; use neutral defaults for graph/context.
|
|
58
|
+
config = load_config(None, root)
|
|
59
|
+
return {p.name: p for p in discover_repos(root, config)}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _cmd_map(args) -> int:
|
|
63
|
+
root = args.root.resolve()
|
|
64
|
+
if not root.is_dir():
|
|
65
|
+
raise SystemExit(f"root not found: {root}")
|
|
66
|
+
config = load_config(args.config, root)
|
|
67
|
+
if args.jobs is not None:
|
|
68
|
+
if args.jobs < 1:
|
|
69
|
+
raise SystemExit("--jobs must be a positive integer")
|
|
70
|
+
config = replace(config, jobs=args.jobs)
|
|
71
|
+
if args.json:
|
|
72
|
+
print(json.dumps(build_map(root, config, __version__).to_json(), indent=2))
|
|
73
|
+
else:
|
|
74
|
+
output = args.output.resolve() if args.output else root / "INDEX.json"
|
|
75
|
+
data = write_map(root, config, __version__, output)
|
|
76
|
+
print(f"wrote {output}")
|
|
77
|
+
print(f"repos={data.repo_count} dirty={data.dirty_count}")
|
|
78
|
+
return 0
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _cmd_graph(args) -> int:
|
|
82
|
+
graph = build_graph(_repo_paths(args.root.resolve()))
|
|
83
|
+
if args.json:
|
|
84
|
+
print(json.dumps(to_json(graph), indent=2))
|
|
85
|
+
else:
|
|
86
|
+
print(render_text(graph, "dependency graph"))
|
|
87
|
+
return 0
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _cmd_context(args) -> int:
|
|
91
|
+
graph = build_graph(_repo_paths(args.root.resolve()))
|
|
92
|
+
names = {n.name for n in graph.repos}
|
|
93
|
+
if args.audit:
|
|
94
|
+
data = to_json(graph)
|
|
95
|
+
print(f"salience-faithfulness warnings: {len(data['salience_audit'])}")
|
|
96
|
+
for w in data["salience_audit"]:
|
|
97
|
+
print(f" [{w['kind']}] {w['node']} (in={w['in_degree']}) — {w['note']}")
|
|
98
|
+
return 0
|
|
99
|
+
if args.focus:
|
|
100
|
+
if args.focus not in names:
|
|
101
|
+
near = [n for n in names if args.focus.lower() in n.lower()]
|
|
102
|
+
print(f"unknown project: {args.focus!r}"
|
|
103
|
+
+ (f" — did you mean: {', '.join(sorted(near))}?" if near else ""))
|
|
104
|
+
return 2
|
|
105
|
+
graph = focus_subgraph(graph, closure(list(graph.edges), args.focus))
|
|
106
|
+
title = f"focus={args.focus}"
|
|
107
|
+
else:
|
|
108
|
+
title = "workstation context"
|
|
109
|
+
print(json.dumps(to_json(graph), indent=2) if args.json else render_text(graph, title))
|
|
110
|
+
return 0
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _head_commit(root) -> str | None:
|
|
114
|
+
import subprocess
|
|
115
|
+
try:
|
|
116
|
+
out = subprocess.run(
|
|
117
|
+
["git", "-C", str(root), "rev-parse", "HEAD"],
|
|
118
|
+
capture_output=True, text=True, timeout=5,
|
|
119
|
+
)
|
|
120
|
+
return out.stdout.strip() or None
|
|
121
|
+
except Exception:
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _cmd_viz(args) -> int:
|
|
126
|
+
from . import viz
|
|
127
|
+
|
|
128
|
+
graph = build_graph(_repo_paths(args.root.resolve()))
|
|
129
|
+
names = {n.name for n in graph.repos}
|
|
130
|
+
if args.focus:
|
|
131
|
+
if args.focus not in names:
|
|
132
|
+
near = [n for n in names if args.focus.lower() in n.lower()]
|
|
133
|
+
print(f"unknown project: {args.focus!r}"
|
|
134
|
+
+ (f" — did you mean: {', '.join(sorted(near))}?" if near else ""))
|
|
135
|
+
return 2
|
|
136
|
+
graph = focus_subgraph(graph, closure(list(graph.edges), args.focus))
|
|
137
|
+
pack = to_json(graph)
|
|
138
|
+
include_external = not args.no_external
|
|
139
|
+
|
|
140
|
+
def _svg() -> str:
|
|
141
|
+
return viz.render_svg(viz.build_layout(pack, include_external=include_external))
|
|
142
|
+
|
|
143
|
+
def _html() -> str:
|
|
144
|
+
return viz.render_html(pack, svg=_svg(), charts=viz.render_charts(pack, include_external=include_external))
|
|
145
|
+
|
|
146
|
+
if args.format == "all":
|
|
147
|
+
out_dir = Path(args.out_dir or ".")
|
|
148
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
149
|
+
files = {
|
|
150
|
+
"graph.mmd": viz.render_mermaid(pack, include_external=include_external).encode("utf-8"),
|
|
151
|
+
"graph.svg": _svg().encode("utf-8"),
|
|
152
|
+
"graph.html": _html().encode("utf-8"),
|
|
153
|
+
"context.json": json.dumps(pack, indent=2).encode("utf-8"),
|
|
154
|
+
}
|
|
155
|
+
for name, data in files.items():
|
|
156
|
+
(out_dir / name).write_bytes(data)
|
|
157
|
+
artifacts = {
|
|
158
|
+
"mermaid": ("graph.mmd", files["graph.mmd"]),
|
|
159
|
+
"svg": ("graph.svg", files["graph.svg"]),
|
|
160
|
+
"html": ("graph.html", files["graph.html"]),
|
|
161
|
+
"context": ("context.json", files["context.json"]),
|
|
162
|
+
}
|
|
163
|
+
meta = {"version": __version__, "commit": _head_commit(args.root.resolve()), "root": str(args.root)}
|
|
164
|
+
manifest = viz.render_manifest(pack, artifacts=artifacts, meta=meta)
|
|
165
|
+
(out_dir / "context-manifest.json").write_text(
|
|
166
|
+
json.dumps(manifest, indent=2), encoding="utf-8"
|
|
167
|
+
)
|
|
168
|
+
return 0
|
|
169
|
+
|
|
170
|
+
text = {"svg": _svg, "mermaid": lambda: viz.render_mermaid(pack, include_external=include_external), "html": _html}[args.format]()
|
|
171
|
+
if args.out:
|
|
172
|
+
Path(args.out).write_text(text, encoding="utf-8")
|
|
173
|
+
else:
|
|
174
|
+
print(text)
|
|
175
|
+
return 0
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def main(argv: list[str] | None = None) -> int:
|
|
179
|
+
raw = list(sys.argv[1:] if argv is None else argv)
|
|
180
|
+
# No leading subcommand: route top-level --version/--help to the root
|
|
181
|
+
# parser; otherwise treat the invocation as the implicit `map` command
|
|
182
|
+
# (preserves v0.2.0 behavior).
|
|
183
|
+
if not raw or raw[0] not in _SUBCOMMANDS:
|
|
184
|
+
if raw and raw[0] in ("--version", "-h", "--help"):
|
|
185
|
+
build_parser().parse_args(raw[:1]) # prints and exits
|
|
186
|
+
raw = ["map", *raw]
|
|
187
|
+
args = build_parser().parse_args(raw)
|
|
188
|
+
if args.cmd == "graph":
|
|
189
|
+
return _cmd_graph(args)
|
|
190
|
+
if args.cmd == "context":
|
|
191
|
+
return _cmd_context(args)
|
|
192
|
+
if args.cmd == "viz":
|
|
193
|
+
return _cmd_viz(args)
|
|
194
|
+
return _cmd_map(args)
|
index_graph/config.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""Configuration: .index.toml parsing, neutral defaults, glob translation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
import sys
|
|
8
|
+
import tomllib
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
DEFAULT_PRUNE_DIRS = frozenset({
|
|
14
|
+
".git", ".mypy_cache", ".pytest_cache", ".ruff_cache",
|
|
15
|
+
"__pycache__", ".venv", "venv", "node_modules",
|
|
16
|
+
})
|
|
17
|
+
DEFAULT_MARKERS = (
|
|
18
|
+
"README.md", "AGENTS.md", "CLAUDE.md", "pyproject.toml", "package.json",
|
|
19
|
+
"Cargo.toml", "CMakeLists.txt", "Makefile", "requirements.txt",
|
|
20
|
+
)
|
|
21
|
+
PUBLIC_HOSTS = frozenset({
|
|
22
|
+
"github.com", "gitlab.com", "bitbucket.org", "codeberg.org", "git.sr.ht",
|
|
23
|
+
})
|
|
24
|
+
_KNOWN_TOP = frozenset({"rule", "scan", "privacy", "output"})
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _default_jobs() -> int:
|
|
28
|
+
return min(32, (os.cpu_count() or 4) * 5)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def glob_to_regex(pattern: str) -> str:
|
|
32
|
+
"""Translate a path glob to an anchored regex.
|
|
33
|
+
|
|
34
|
+
`*` matches within a segment, `**` across segments, `/**` makes the
|
|
35
|
+
separator optional so `public/**` also matches `public`.
|
|
36
|
+
"""
|
|
37
|
+
out: list[str] = []
|
|
38
|
+
i, n = 0, len(pattern)
|
|
39
|
+
while i < n:
|
|
40
|
+
if pattern.startswith("/**", i):
|
|
41
|
+
out.append("(/.*)?")
|
|
42
|
+
i += 3
|
|
43
|
+
elif pattern.startswith("**", i):
|
|
44
|
+
out.append(".*")
|
|
45
|
+
i += 2
|
|
46
|
+
elif pattern[i] == "*":
|
|
47
|
+
out.append("[^/]*")
|
|
48
|
+
i += 1
|
|
49
|
+
else:
|
|
50
|
+
out.append(re.escape(pattern[i]))
|
|
51
|
+
i += 1
|
|
52
|
+
return "^" + "".join(out) + "$"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass(frozen=True)
|
|
56
|
+
class Rule:
|
|
57
|
+
pattern: str
|
|
58
|
+
class_: str
|
|
59
|
+
regex: re.Pattern = field(init=False, compare=False, repr=False)
|
|
60
|
+
|
|
61
|
+
def __post_init__(self) -> None:
|
|
62
|
+
object.__setattr__(self, "regex", re.compile(glob_to_regex(self.pattern)))
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass(frozen=True)
|
|
66
|
+
class Config:
|
|
67
|
+
rules: tuple[Rule, ...] = ()
|
|
68
|
+
extra_prune: frozenset[str] = frozenset()
|
|
69
|
+
markers: tuple[str, ...] = DEFAULT_MARKERS
|
|
70
|
+
jobs: int = field(default_factory=_default_jobs)
|
|
71
|
+
omit_origin_classes: frozenset[str] = frozenset()
|
|
72
|
+
portable: bool = True
|
|
73
|
+
annotations: dict[str, Any] = field(default_factory=dict)
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def prune(self) -> frozenset[str]:
|
|
77
|
+
return DEFAULT_PRUNE_DIRS | self.extra_prune
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def default_config() -> Config:
|
|
81
|
+
return Config()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def load_config(path: Path | None, root: Path) -> Config:
|
|
85
|
+
if path is None:
|
|
86
|
+
candidate = root / ".index.toml"
|
|
87
|
+
if not candidate.exists():
|
|
88
|
+
return default_config()
|
|
89
|
+
path = candidate
|
|
90
|
+
elif not path.exists():
|
|
91
|
+
raise SystemExit(f"config not found: {path}")
|
|
92
|
+
with path.open("rb") as handle:
|
|
93
|
+
try:
|
|
94
|
+
data = tomllib.load(handle)
|
|
95
|
+
except tomllib.TOMLDecodeError as exc:
|
|
96
|
+
raise SystemExit(f"{path}: invalid TOML: {exc}") from exc
|
|
97
|
+
return _build_config(data, path)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _build_config(data: dict[str, Any], path: Path) -> Config:
|
|
101
|
+
rules: list[Rule] = []
|
|
102
|
+
for idx, item in enumerate(data.get("rule", [])):
|
|
103
|
+
if "pattern" not in item or "class" not in item:
|
|
104
|
+
raise SystemExit(f"{path}: rule[{idx}] requires 'pattern' and 'class'")
|
|
105
|
+
rules.append(Rule(str(item["pattern"]), str(item["class"])))
|
|
106
|
+
|
|
107
|
+
scan = data.get("scan", {})
|
|
108
|
+
jobs = scan.get("jobs", _default_jobs())
|
|
109
|
+
if not isinstance(jobs, int) or jobs < 1:
|
|
110
|
+
raise SystemExit(f"{path}: [scan] jobs must be a positive integer")
|
|
111
|
+
extra_prune = frozenset(str(d) for d in scan.get("prune", []))
|
|
112
|
+
markers = tuple(scan["markers"]) if "markers" in scan else DEFAULT_MARKERS
|
|
113
|
+
|
|
114
|
+
omit = frozenset(str(c) for c in data.get("privacy", {}).get("omit_origin_classes", []))
|
|
115
|
+
|
|
116
|
+
output = data.get("output", {})
|
|
117
|
+
portable = bool(output.get("portable", True))
|
|
118
|
+
annotations = dict(output.get("annotations", {}))
|
|
119
|
+
|
|
120
|
+
for key in data:
|
|
121
|
+
if key not in _KNOWN_TOP:
|
|
122
|
+
print(f"{path}: warning: unknown config key '{key}'", file=sys.stderr)
|
|
123
|
+
|
|
124
|
+
return Config(tuple(rules), extra_prune, markers, jobs, omit, portable, annotations)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Synthesis context-pack renderer."""
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Render a DependencyGraph as the synthesis context pack (relations+roles+prose).
|
|
2
|
+
|
|
3
|
+
No editorializing: every line traces to a data field or an evidence record.
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from ..graph.build import DependencyGraph, RepoNode
|
|
8
|
+
from ..graph.edges import Edge
|
|
9
|
+
from ..graph.roles import salience_audit, structural_salience
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _marker_list(node: RepoNode) -> list[str]:
|
|
13
|
+
out = []
|
|
14
|
+
if "entry" in node.markers:
|
|
15
|
+
out.append("entry")
|
|
16
|
+
if "published" in node.markers:
|
|
17
|
+
out.append("published")
|
|
18
|
+
return out
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def render_text(graph: DependencyGraph, title: str) -> str:
|
|
22
|
+
L = [f"# Context pack: {title}", ""]
|
|
23
|
+
L.append("## Roles (project: roles — in/out degree)")
|
|
24
|
+
sal = structural_salience(list(graph.edges))
|
|
25
|
+
for node in sorted(graph.repos, key=lambda n: n.name):
|
|
26
|
+
rs = ", ".join(graph.roles.get(node.name, ())) or "(none)"
|
|
27
|
+
s = sal.get(node.name, {"in_degree": 0, "out_degree": 0})
|
|
28
|
+
L.append(f"- {node.name}: {rs} — in={s['in_degree']} out={s['out_degree']}")
|
|
29
|
+
L.append("")
|
|
30
|
+
L.append("## Relations (A -> B: signals [confidence])")
|
|
31
|
+
for e in graph.edges:
|
|
32
|
+
if e.external:
|
|
33
|
+
continue
|
|
34
|
+
kinds = "+".join(sorted({s.kind for s in e.signals}))
|
|
35
|
+
L.append(f"- {e.from_repo} -> {e.to_repo}: {kinds} [{e.confidence}]")
|
|
36
|
+
L.append("")
|
|
37
|
+
L.append("## External dependencies (A -> name)")
|
|
38
|
+
for e in graph.edges:
|
|
39
|
+
if e.external:
|
|
40
|
+
L.append(f"- {e.from_repo} -> {e.target_name}")
|
|
41
|
+
L.append("")
|
|
42
|
+
L.append("## Inventory (all projects — extracted description)")
|
|
43
|
+
for node in sorted(graph.repos, key=lambda n: n.name):
|
|
44
|
+
eco = "/".join(node.ecosystems) or "none"
|
|
45
|
+
L.append(f"- {node.name} [{eco}]: {node.description}")
|
|
46
|
+
L.append("")
|
|
47
|
+
if graph.warnings:
|
|
48
|
+
L.append(f"## Warnings ({len(graph.warnings)})")
|
|
49
|
+
for w in graph.warnings:
|
|
50
|
+
L.append(f"- {w}")
|
|
51
|
+
return "\n".join(L)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def to_json(graph: DependencyGraph) -> dict:
|
|
55
|
+
sal = structural_salience(list(graph.edges))
|
|
56
|
+
marked = {n.name: _marker_list(n) for n in graph.repos if _marker_list(n)}
|
|
57
|
+
relations = [{
|
|
58
|
+
"from": e.from_repo, "to": e.to_repo, "target_name": e.target_name,
|
|
59
|
+
"external": e.external, "confidence": e.confidence,
|
|
60
|
+
"signals": [{"kind": s.kind, "file": s.evidence_file, "line": s.evidence_line,
|
|
61
|
+
"raw": s.raw_spec} for s in e.signals],
|
|
62
|
+
} for e in graph.edges]
|
|
63
|
+
return {
|
|
64
|
+
"roles": {n.name: list(graph.roles.get(n.name, ())) for n in graph.repos},
|
|
65
|
+
"relations": relations,
|
|
66
|
+
"salience": sal,
|
|
67
|
+
"salience_audit": salience_audit(sal, marked),
|
|
68
|
+
"repos": [{"name": n.name, "ecosystems": list(n.ecosystems),
|
|
69
|
+
"description": n.description, "markers": sorted(n.markers)}
|
|
70
|
+
for n in graph.repos],
|
|
71
|
+
"warnings": list(graph.warnings),
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def closure(edges: list[Edge], focus: str) -> set[str]:
|
|
76
|
+
adj: dict[str, set[str]] = {}
|
|
77
|
+
for e in edges:
|
|
78
|
+
if e.external or e.to_repo is None:
|
|
79
|
+
continue
|
|
80
|
+
adj.setdefault(e.from_repo, set()).add(e.to_repo)
|
|
81
|
+
adj.setdefault(e.to_repo, set()).add(e.from_repo)
|
|
82
|
+
seen = {focus}
|
|
83
|
+
stack = [focus]
|
|
84
|
+
while stack:
|
|
85
|
+
n = stack.pop()
|
|
86
|
+
for m in adj.get(n, ()):
|
|
87
|
+
if m not in seen:
|
|
88
|
+
seen.add(m)
|
|
89
|
+
stack.append(m)
|
|
90
|
+
return seen
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def focus_subgraph(graph: DependencyGraph, keep: set[str]) -> DependencyGraph:
|
|
94
|
+
repos = tuple(n for n in graph.repos if n.name in keep)
|
|
95
|
+
edges = tuple(e for e in graph.edges
|
|
96
|
+
if e.from_repo in keep and (e.external or e.to_repo in keep))
|
|
97
|
+
roles = {k: v for k, v in graph.roles.items() if k in keep}
|
|
98
|
+
return DependencyGraph(repos, edges, roles, graph.warnings)
|
index_graph/gitmeta.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Git subprocess access and always-on credential redaction."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
import subprocess
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
_USERINFO = re.compile(r"(?i)(https?://)[^/@]+@")
|
|
11
|
+
_SECRET_QUERY = re.compile(r"(?i)\b(token|password|secret|api[_-]?key)=([^@\s]+)")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def sanitize_credentials(origin: str) -> str:
|
|
15
|
+
clean = _USERINFO.sub(r"\1<redacted>@", origin)
|
|
16
|
+
return _SECRET_QUERY.sub(r"\1=<redacted>", clean)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def run_git(repo: Path, args: list[str]) -> str:
|
|
20
|
+
try:
|
|
21
|
+
result = subprocess.run(
|
|
22
|
+
["git", "-C", str(repo), *args],
|
|
23
|
+
text=True, capture_output=True, timeout=20, check=False,
|
|
24
|
+
)
|
|
25
|
+
except subprocess.TimeoutExpired:
|
|
26
|
+
return ""
|
|
27
|
+
return result.stdout.strip() if result.returncode == 0 else ""
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def repo_metadata(repo: Path) -> dict[str, Any]:
|
|
31
|
+
status = run_git(repo, ["status", "--porcelain=v1"]).splitlines()
|
|
32
|
+
untracked = sum(1 for line in status if line.startswith("??"))
|
|
33
|
+
dirty = sum(1 for line in status if line and not line.startswith("??"))
|
|
34
|
+
branch = (
|
|
35
|
+
run_git(repo, ["branch", "--show-current"])
|
|
36
|
+
or run_git(repo, ["rev-parse", "--abbrev-ref", "HEAD"])
|
|
37
|
+
or "unknown"
|
|
38
|
+
)
|
|
39
|
+
head = run_git(repo, ["rev-parse", "--short=7", "HEAD"]) or "unknown"
|
|
40
|
+
origin = sanitize_credentials(run_git(repo, ["config", "--get", "remote.origin.url"]) or "")
|
|
41
|
+
return {
|
|
42
|
+
"branch": branch,
|
|
43
|
+
"head": head,
|
|
44
|
+
"origin": origin,
|
|
45
|
+
"dirty_count": dirty,
|
|
46
|
+
"untracked_count": untracked,
|
|
47
|
+
}
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""Assemble repo trees + resolvers into a DependencyGraph."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import configparser
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
import tomllib
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from .edges import Edge, build_index, resolve_edges
|
|
12
|
+
from .walk import walk_files
|
|
13
|
+
from .resolvers import ALL_RESOLVERS
|
|
14
|
+
from .resolvers.base import RawEdge
|
|
15
|
+
from .roles import derive_roles
|
|
16
|
+
|
|
17
|
+
_PARA = re.compile(r"\n\s*\n")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(frozen=True)
|
|
21
|
+
class RepoNode:
|
|
22
|
+
name: str
|
|
23
|
+
path: str
|
|
24
|
+
ecosystems: tuple[str, ...]
|
|
25
|
+
exposed_names: frozenset[str]
|
|
26
|
+
description: str
|
|
27
|
+
markers: frozenset[str]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass(frozen=True)
|
|
31
|
+
class DependencyGraph:
|
|
32
|
+
repos: tuple[RepoNode, ...]
|
|
33
|
+
edges: tuple[Edge, ...]
|
|
34
|
+
roles: dict[str, tuple[str, ...]]
|
|
35
|
+
warnings: tuple[str, ...]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _description(repo_root: Path) -> str:
|
|
39
|
+
for readme in ("README.md", "README.rst", "README.txt", "readme.md"):
|
|
40
|
+
p = repo_root / readme
|
|
41
|
+
if p.is_file():
|
|
42
|
+
try:
|
|
43
|
+
text = p.read_text(encoding="utf-8").strip()
|
|
44
|
+
except OSError:
|
|
45
|
+
continue
|
|
46
|
+
for block in _PARA.split(text):
|
|
47
|
+
b = block.strip()
|
|
48
|
+
if b and not b.startswith("#") and not b.startswith("!["):
|
|
49
|
+
return " ".join(b.split())[:300]
|
|
50
|
+
pp = repo_root / "pyproject.toml"
|
|
51
|
+
if pp.is_file():
|
|
52
|
+
try:
|
|
53
|
+
d = tomllib.loads(pp.read_text(encoding="utf-8")).get("project", {})
|
|
54
|
+
if d.get("description"):
|
|
55
|
+
return str(d["description"])
|
|
56
|
+
except (tomllib.TOMLDecodeError, OSError):
|
|
57
|
+
pass
|
|
58
|
+
pj = repo_root / "package.json"
|
|
59
|
+
if pj.is_file():
|
|
60
|
+
try:
|
|
61
|
+
d = json.loads(pj.read_text(encoding="utf-8"))
|
|
62
|
+
if d.get("description"):
|
|
63
|
+
return str(d["description"])
|
|
64
|
+
except (json.JSONDecodeError, OSError):
|
|
65
|
+
pass
|
|
66
|
+
return "(no description)"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def detect_markers(repo_root: Path, exposed: set[str]) -> set[str]:
|
|
70
|
+
mk: set[str] = set()
|
|
71
|
+
if exposed:
|
|
72
|
+
mk.add("published")
|
|
73
|
+
pp = repo_root / "pyproject.toml"
|
|
74
|
+
if pp.is_file():
|
|
75
|
+
try:
|
|
76
|
+
data = tomllib.loads(pp.read_text(encoding="utf-8"))
|
|
77
|
+
if data.get("project", {}).get("scripts") or \
|
|
78
|
+
data.get("project", {}).get("entry-points"):
|
|
79
|
+
mk.add("entry")
|
|
80
|
+
except (tomllib.TOMLDecodeError, OSError):
|
|
81
|
+
pass
|
|
82
|
+
cfg = repo_root / "setup.cfg"
|
|
83
|
+
if cfg.is_file():
|
|
84
|
+
try:
|
|
85
|
+
cp = configparser.ConfigParser()
|
|
86
|
+
cp.read(cfg, encoding="utf-8")
|
|
87
|
+
if cp.has_option("options.entry_points", "console_scripts"):
|
|
88
|
+
mk.add("entry")
|
|
89
|
+
except (configparser.Error, OSError):
|
|
90
|
+
pass
|
|
91
|
+
pj = repo_root / "package.json"
|
|
92
|
+
if pj.is_file():
|
|
93
|
+
try:
|
|
94
|
+
if json.loads(pj.read_text(encoding="utf-8")).get("bin"):
|
|
95
|
+
mk.add("entry")
|
|
96
|
+
except (json.JSONDecodeError, OSError):
|
|
97
|
+
pass
|
|
98
|
+
if any(walk_files(repo_root, names=("__main__.py",))):
|
|
99
|
+
mk.add("entry")
|
|
100
|
+
return mk
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def build_graph(repo_paths: dict[str, Path], resolvers=ALL_RESOLVERS) -> DependencyGraph:
|
|
104
|
+
nodes: list[RepoNode] = []
|
|
105
|
+
exposed: dict[str, set[str]] = {}
|
|
106
|
+
repo_raw: dict[str, list[RawEdge]] = {}
|
|
107
|
+
markers: dict[str, set[str]] = {}
|
|
108
|
+
for name, root in sorted(repo_paths.items()):
|
|
109
|
+
ecos: list[str] = []
|
|
110
|
+
names: set[str] = set()
|
|
111
|
+
raws: list[RawEdge] = []
|
|
112
|
+
for r in resolvers:
|
|
113
|
+
if r.matches(root):
|
|
114
|
+
ecos.append(r.name)
|
|
115
|
+
names |= r.exposed_names(root)
|
|
116
|
+
raws += r.raw_edges(root)
|
|
117
|
+
exposed[name] = names
|
|
118
|
+
repo_raw[name] = raws
|
|
119
|
+
mk = detect_markers(root, names)
|
|
120
|
+
markers[name] = mk
|
|
121
|
+
nodes.append(RepoNode(name, str(root), tuple(ecos), frozenset(names),
|
|
122
|
+
_description(root), frozenset(mk)))
|
|
123
|
+
|
|
124
|
+
index = build_index(exposed)
|
|
125
|
+
edges, warnings = resolve_edges(repo_raw, index)
|
|
126
|
+
roles = derive_roles(set(repo_paths), edges, markers)
|
|
127
|
+
return DependencyGraph(tuple(nodes), tuple(edges), roles, tuple(warnings))
|