code-lens-cli 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
seer/repo/profile.py ADDED
@@ -0,0 +1,700 @@
1
+ """Single-repo profiler.
2
+
3
+ Two depths:
4
+
5
+ * shallow (default) — mechanical facts from pyproject.toml, on-disk layout,
6
+ vendored-skill list, CITATION.md, CHANGELOG.md, CLAUDE.md status section.
7
+ * deep — shallow + README intro, CLAUDE.md design sections, last 10
8
+ commit subjects (added in :func:`profile_deep`, separate task).
9
+
10
+ Missing optional sources degrade silently to empty fields.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import ast
16
+ import json
17
+ import re
18
+ import subprocess # noqa: S404 # nosec B404
19
+ import tomllib
20
+ import urllib.request
21
+ from pathlib import Path
22
+
23
+ import yaml
24
+
25
+ from seer.repo.detect import resolve_name
26
+ from seer.repo.manifest import read_pyproject
27
+
28
+ _WORKFLOW_NAME_RE = re.compile(r"^name:\s*(.+?)\s*$", re.MULTILINE)
29
+ _REMOTE_RE = re.compile(r"^(?:git@|https?://)([^:/]+)[:/](.+?)(?:\.git)?/?$")
30
+
31
+
32
+ _GH_TIMEOUT = 10 # seconds per gh api call
33
+
34
+
35
+ def _gh_api(endpoint: str) -> dict | None:
36
+ """Run ``gh api <endpoint>`` and return parsed JSON, or None on any failure."""
37
+ try:
38
+ result = subprocess.run( # noqa: S603,S607 # nosec B603 B607
39
+ ["gh", "api", endpoint],
40
+ capture_output=True,
41
+ text=True,
42
+ check=False,
43
+ timeout=_GH_TIMEOUT,
44
+ )
45
+ except FileNotFoundError:
46
+ return None
47
+ except subprocess.TimeoutExpired:
48
+ return None
49
+ if result.returncode != 0:
50
+ return None
51
+ try:
52
+ return json.loads(result.stdout)
53
+ except json.JSONDecodeError:
54
+ return None
55
+
56
+
57
+ def _github_state(git_remote: dict | None) -> dict | None:
58
+ """Return live GitHub repo state via ``gh api``.
59
+
60
+ Queries three endpoints: repo metadata (default branch + open issues),
61
+ latest release, and latest CI run on the default branch. Any network /
62
+ parse / missing-key failure causes a ``None`` return — callers must treat
63
+ the field as optional.
64
+ """
65
+ if git_remote is None:
66
+ return None
67
+ owner = git_remote.get("owner")
68
+ repo_name = git_remote.get("repo")
69
+ if not owner or not repo_name:
70
+ return None
71
+
72
+ slug = f"{owner}/{repo_name}"
73
+
74
+ repo_data = _gh_api(f"repos/{slug}")
75
+ if repo_data is None:
76
+ return None
77
+ try:
78
+ default_branch = repo_data["default_branch"]
79
+ open_issues = repo_data["open_issues_count"]
80
+ except KeyError:
81
+ return None
82
+
83
+ release_data = _gh_api(f"repos/{slug}/releases/latest")
84
+ latest_release: dict | None = None
85
+ if release_data is not None:
86
+ try:
87
+ latest_release = {
88
+ "tag": release_data["tag_name"],
89
+ "published_at": release_data["published_at"],
90
+ }
91
+ except KeyError:
92
+ latest_release = None
93
+
94
+ runs_data = _gh_api(f"repos/{slug}/actions/runs?branch={default_branch}&per_page=1")
95
+ ci_status: str | None = None
96
+ if runs_data is not None:
97
+ try:
98
+ runs = runs_data.get("workflow_runs") or []
99
+ if runs:
100
+ ci_status = runs[0].get("conclusion")
101
+ except (KeyError, IndexError):
102
+ ci_status = None
103
+
104
+ return {
105
+ "latest_release": latest_release,
106
+ "open_issues": open_issues,
107
+ "default_branch": default_branch,
108
+ "ci_status_on_default": ci_status,
109
+ }
110
+
111
+
112
+ def _pypi_state(pkg_name: str | None) -> dict | None:
113
+ """Return published package state from the PyPI JSON API.
114
+
115
+ Queries ``https://pypi.org/pypi/<pkg_name>/json`` and extracts the
116
+ latest version and its upload timestamp. Any network / parse / structural
117
+ failure returns ``None`` — callers must treat the field as optional.
118
+ """
119
+ if not pkg_name:
120
+ return None
121
+ url = f"https://pypi.org/pypi/{pkg_name}/json"
122
+ try:
123
+ with urllib.request.urlopen(url, timeout=5) as resp: # noqa: S310 # nosec B310
124
+ raw = resp.read()
125
+ except OSError:
126
+ return None
127
+ try:
128
+ data = json.loads(raw)
129
+ version = data["info"]["version"]
130
+ releases = data.get("releases") or {}
131
+ release_files = releases.get(version) or []
132
+ released_at: str | None = None
133
+ if release_files:
134
+ released_at = release_files[0].get("upload_time_iso_8601")
135
+ return {"latest_version": version, "released_at": released_at}
136
+ except (json.JSONDecodeError, KeyError, IndexError):
137
+ return None
138
+
139
+
140
+ def profile_shallow(path: Path, *, basic: bool = False) -> dict[str, object]:
141
+ """Return a shallow profile dict for the repo at ``path``.
142
+
143
+ Reads from multiple optional sources (pyproject.toml, CLAUDE.md,
144
+ CHANGELOG.md, CITATION.md, .claude/skills/, culture.yaml) and degrades
145
+ silently when any source is missing.
146
+
147
+ When ``basic=True`` the Tier-2 online fields (``github_state``,
148
+ ``pypi_state``) are skipped entirely — no subprocess or network calls are
149
+ made for those fields.
150
+ """
151
+ has_pyproject = (path / _PYPROJECT_TOML).exists()
152
+ if has_pyproject:
153
+ m = read_pyproject(path)
154
+ language = "python"
155
+ manifest: str | None = _PYPROJECT_TOML
156
+ try:
157
+ raw_pyproject: dict | None = tomllib.loads(
158
+ (path / _PYPROJECT_TOML).read_text(encoding="utf-8")
159
+ )
160
+ except (tomllib.TOMLDecodeError, OSError):
161
+ raw_pyproject = None
162
+ else:
163
+ m = {
164
+ "name": resolve_name(path),
165
+ "version": "",
166
+ "entry_points": {},
167
+ "deps_runtime": [],
168
+ "deps_dev": [],
169
+ }
170
+ language = "unknown"
171
+ manifest = None
172
+ raw_pyproject = None
173
+ package_tree = _package_tree(path)
174
+ git_remote = _git_remote(path)
175
+ pkg_name: str | None = m.get("name") or None # type: ignore[assignment]
176
+ profile: dict[str, object] = {
177
+ "path": str(path),
178
+ "name": m["name"],
179
+ "version": m["version"],
180
+ "language": language,
181
+ "manifest": manifest,
182
+ "entry_points": m["entry_points"],
183
+ "deps_runtime": m["deps_runtime"],
184
+ "deps_dev": m["deps_dev"],
185
+ "package_layout": _list_packages(path),
186
+ "package_tree": package_tree,
187
+ "build_test": _build_test(raw_pyproject),
188
+ "ci_workflows": _ci_workflows(path),
189
+ "publish_target": _publish_target(path),
190
+ "git_remote": git_remote,
191
+ "module_summaries": _module_docs(path, package_tree),
192
+ "github_state": None if basic else _github_state(git_remote),
193
+ "pypi_state": None if basic else _pypi_state(pkg_name),
194
+ "vendored_skills": _list_vendored_skills(path),
195
+ "citations": _read_citations(path),
196
+ "changelog_recent": _read_changelog(path, n=3),
197
+ "claude_md_status": _read_claude_md_section(path, "## Project Status"),
198
+ "extra": {},
199
+ }
200
+ nick = _read_culture_nick(path)
201
+ if nick:
202
+ profile["extra"]["culture_nick"] = nick # type: ignore[index]
203
+ return profile
204
+
205
+
206
+ _PKG_EXCLUDE = {"tests", "docs", "scripts", "__pycache__"}
207
+ _INIT_PY = "__init__.py"
208
+ _PYPROJECT_TOML = "pyproject.toml"
209
+
210
+
211
+ def _is_candidate_pkg_dir(child: Path) -> bool:
212
+ """True if *child* is a non-hidden, non-excluded directory worth scanning."""
213
+ return child.is_dir() and not child.name.startswith(".") and child.name not in _PKG_EXCLUDE
214
+
215
+
216
+ def _list_packages(path: Path) -> list[str]:
217
+ """Return one-level Python packages at the repo root or under ``src/``."""
218
+ out: list[str] = []
219
+ for child in sorted(path.iterdir()):
220
+ if not _is_candidate_pkg_dir(child):
221
+ continue
222
+ if (child / _INIT_PY).exists():
223
+ out.append(child.name + "/")
224
+ src = path / "src"
225
+ if src.is_dir():
226
+ for child in sorted(src.iterdir()):
227
+ if not _is_candidate_pkg_dir(child):
228
+ continue
229
+ if (child / _INIT_PY).exists():
230
+ out.append(f"src/{child.name}/")
231
+ return out
232
+
233
+
234
+ def _package_node(pkg_dir: Path, *, remaining_depth: int) -> dict[str, object]:
235
+ """Build one tree node for *pkg_dir*; recurse into subpackages until depth exhausted."""
236
+ modules: list[str] = []
237
+ subpackages: list[dict[str, object]] = []
238
+ for child in sorted(pkg_dir.iterdir()):
239
+ if child.name.startswith(".") or child.name in _PKG_EXCLUDE:
240
+ continue
241
+ if child.is_file() and child.suffix == ".py":
242
+ modules.append(child.name)
243
+ continue
244
+ if child.is_dir() and (child / _INIT_PY).exists() and remaining_depth > 0:
245
+ subpackages.append(_package_node(child, remaining_depth=remaining_depth - 1))
246
+ return {"name": pkg_dir.name, "modules": modules, "subpackages": subpackages}
247
+
248
+
249
+ def _package_tree(path: Path, *, max_depth: int = 2) -> list[dict[str, object]]:
250
+ """Return one node per top-level package with up to ``max_depth`` levels of subpackages.
251
+
252
+ Walks the same roots as :func:`_list_packages` (repo root + ``src/``) and
253
+ honors the same exclude set, so callers that consume both the flat
254
+ ``package_layout`` and the nested ``package_tree`` see consistent contents.
255
+
256
+ ``max_depth=2`` means: top-level package (e.g. ``demo/``) plus up to two
257
+ nested levels of subpackages (e.g. ``demo/cli/`` and ``demo/cli/_commands/``).
258
+ """
259
+ out: list[dict[str, object]] = []
260
+ for child in sorted(path.iterdir()):
261
+ if not _is_candidate_pkg_dir(child):
262
+ continue
263
+ if (child / _INIT_PY).exists():
264
+ out.append(_package_node(child, remaining_depth=max_depth))
265
+ src = path / "src"
266
+ if src.is_dir():
267
+ for child in sorted(src.iterdir()):
268
+ if not _is_candidate_pkg_dir(child):
269
+ continue
270
+ if (child / _INIT_PY).exists():
271
+ out.append(_package_node(child, remaining_depth=max_depth))
272
+ return out
273
+
274
+
275
+ def _list_vendored_skills(path: Path) -> list[dict[str, str]]:
276
+ """Return ``.claude/skills/*`` entries, augmented with provenance when present."""
277
+ skills_dir = path / ".claude" / "skills"
278
+ if not skills_dir.is_dir():
279
+ return []
280
+ skills: list[dict[str, str]] = []
281
+ for skill_dir in sorted(skills_dir.iterdir()):
282
+ if skill_dir.is_dir():
283
+ skills.append(
284
+ {
285
+ "name": skill_dir.name,
286
+ "path": f".claude/skills/{skill_dir.name}/",
287
+ }
288
+ )
289
+ provenance = _read_skill_sources(path)
290
+ for skill in skills:
291
+ if skill["name"] in provenance:
292
+ skill.update(provenance[skill["name"]])
293
+ return skills
294
+
295
+
296
+ def _unwrap_backticks(val: str) -> str:
297
+ """Strip a *fully balanced* ```…``` pair from *val* and trim whitespace.
298
+
299
+ Cells with internal ```…``` spans (e.g.
300
+ ```agentculture/steward` (`.claude/skills/cicd/`)``)
301
+ are left intact so the rendered markdown stays valid.
302
+ """
303
+ v = val.strip()
304
+ if len(v) >= 2 and v.startswith("`") and v.endswith("`"):
305
+ return v[1:-1].strip()
306
+ return v
307
+
308
+
309
+ def _read_skill_sources(path: Path) -> dict[str, dict[str, str]]:
310
+ """Parse ``docs/skill-sources.md`` table rows into ``{name: {source, version}}``."""
311
+ f = path / "docs" / "skill-sources.md"
312
+ if not f.exists():
313
+ return {}
314
+ out: dict[str, dict[str, str]] = {}
315
+ for line in f.read_text(encoding="utf-8").splitlines():
316
+ s = line.strip()
317
+ if not s.startswith("|") or "---" in s:
318
+ continue
319
+ parts = [p.strip() for p in s.strip("|").split("|")]
320
+ if len(parts) >= 2 and parts[0] and parts[1] and parts[0] not in {"name", "Skill"}:
321
+ key = _unwrap_backticks(parts[0])
322
+ if key.lower() in {"name", "skill"}:
323
+ continue
324
+ out[key] = {
325
+ "source": _unwrap_backticks(parts[1]),
326
+ "version": _unwrap_backticks(parts[2]) if len(parts) >= 3 else "",
327
+ }
328
+ return out
329
+
330
+
331
+ def _read_citations(path: Path) -> list[dict[str, str]]:
332
+ """Parse ``CITATION.md`` rows into ``[{local, source_repo, sha}]``."""
333
+ f = path / "CITATION.md"
334
+ if not f.exists():
335
+ return []
336
+ out: list[dict[str, str]] = []
337
+ for line in f.read_text(encoding="utf-8").splitlines():
338
+ s = line.strip()
339
+ if not s.startswith("|") or "---" in s:
340
+ continue
341
+ parts = [p.strip() for p in s.strip("|").split("|")]
342
+ if len(parts) >= 3 and parts[0] and parts[1] and parts[2]:
343
+ first = _unwrap_backticks(parts[0]).lower()
344
+ if first.startswith("local") or first in {"path", "file"}:
345
+ continue
346
+ out.append(
347
+ {
348
+ "local": _unwrap_backticks(parts[0]),
349
+ "source_repo": _unwrap_backticks(parts[1]),
350
+ "sha": _unwrap_backticks(parts[2]),
351
+ }
352
+ )
353
+ return out
354
+
355
+
356
+ def _is_changelog_summary_line(line: str) -> bool:
357
+ """True when *line* is the first body line that should become an entry summary."""
358
+ body = line.strip()
359
+ if not body:
360
+ return False
361
+ return not body.startswith("#")
362
+
363
+
364
+ def _first_changelog_summary(body_lines: list[str]) -> str:
365
+ """Return the first viable summary line from a slice of body lines, else ``""``."""
366
+ for line in body_lines:
367
+ if _is_changelog_summary_line(line):
368
+ return line.strip().lstrip("-").strip()
369
+ return ""
370
+
371
+
372
+ def _read_changelog(path: Path, *, n: int) -> list[dict[str, str]]:
373
+ """Return up to ``n`` recent entries from ``CHANGELOG.md`` (Keep-a-Changelog).
374
+
375
+ Two-pass: collect heading indices first, then extract one summary line
376
+ per heading from the body slice between it and the next heading. This
377
+ keeps the per-function cognitive complexity small.
378
+ """
379
+ f = path / "CHANGELOG.md"
380
+ if not f.exists():
381
+ return []
382
+ lines = f.read_text(encoding="utf-8").splitlines()
383
+ heading_positions = [(i, line) for i, line in enumerate(lines) if line.startswith("## ")][:n]
384
+ entries: list[dict[str, str]] = []
385
+ for idx, (start, heading_line) in enumerate(heading_positions):
386
+ entry = _parse_changelog_heading(heading_line)
387
+ next_heading = (
388
+ heading_positions[idx + 1][0] if idx + 1 < len(heading_positions) else len(lines)
389
+ )
390
+ entry["summary"] = _first_changelog_summary(lines[start + 1 : next_heading])
391
+ entries.append(entry)
392
+ return entries
393
+
394
+
395
+ def _parse_changelog_heading(line: str) -> dict[str, str]:
396
+ """Extract version and date from a Keep-a-Changelog heading line."""
397
+ text = line[3:].strip()
398
+ if text.startswith("[") and "]" in text:
399
+ version = text[1 : text.index("]")]
400
+ rest = text[text.index("]") + 1 :].lstrip(" -")
401
+ return {"version": version, "date": rest.strip()}
402
+ parts = text.split()
403
+ version = parts[0] if parts else ""
404
+ date = parts[-1].strip("()") if len(parts) > 1 else ""
405
+ return {"version": version, "date": date}
406
+
407
+
408
+ def _read_claude_md_section(path: Path, heading: str) -> str:
409
+ """Return the body of a ``## Heading`` section from CLAUDE.md, stripped."""
410
+ f = path / "CLAUDE.md"
411
+ if not f.exists():
412
+ return ""
413
+ inside = False
414
+ out: list[str] = []
415
+ for line in f.read_text(encoding="utf-8").splitlines():
416
+ if line.strip() == heading:
417
+ inside = True
418
+ continue
419
+ if inside:
420
+ if line.startswith("## "):
421
+ break
422
+ out.append(line)
423
+ return "\n".join(out).strip()
424
+
425
+
426
+ def _read_culture_nick(path: Path) -> str:
427
+ """Return ``agents[0].suffix`` (or ``.nick``) from ``culture.yaml`` if present."""
428
+ f = path / "culture.yaml"
429
+ if not f.exists():
430
+ return ""
431
+ try:
432
+ data = yaml.safe_load(f.read_text(encoding="utf-8")) or {}
433
+ except yaml.YAMLError:
434
+ return ""
435
+ agents = data.get("agents", [])
436
+ if not agents or not isinstance(agents[0], dict):
437
+ return ""
438
+ return str(agents[0].get("suffix") or agents[0].get("nick") or "")
439
+
440
+
441
+ def _build_test(pyproject: dict | None) -> dict | None:
442
+ """Extract test/coverage/python metadata from a raw pyproject dict.
443
+
444
+ Returns a dict with some subset of ``test_command``, ``test_addopts``,
445
+ ``coverage_fail_under``, and ``python_requires``. Keys whose value is
446
+ None are dropped. Returns None when *pyproject* is None.
447
+ """
448
+ if pyproject is None:
449
+ return None
450
+ pytest_opts = pyproject.get("tool") or {}
451
+ pytest_addopts = ((pytest_opts.get("pytest") or {}).get("ini_options") or {}).get("addopts")
452
+ coverage_fail = ((pytest_opts.get("coverage") or {}).get("report") or {}).get("fail_under")
453
+ python_requires = (pyproject.get("project") or {}).get("requires-python")
454
+ result: dict = {"test_command": "pytest"}
455
+ if pytest_addopts is not None:
456
+ result["test_addopts"] = pytest_addopts
457
+ if coverage_fail is not None:
458
+ result["coverage_fail_under"] = coverage_fail
459
+ if python_requires is not None:
460
+ result["python_requires"] = python_requires
461
+ return result
462
+
463
+
464
+ def _ci_workflows(path: Path) -> list[dict[str, str]]:
465
+ """Scan ``.github/workflows/*.{yml,yaml}`` and return name + filename entries."""
466
+ workflows_dir = path / ".github" / "workflows"
467
+ if not workflows_dir.is_dir():
468
+ return []
469
+ out: list[dict[str, str]] = []
470
+ for wf_file in sorted(workflows_dir.iterdir()):
471
+ if wf_file.suffix not in {".yml", ".yaml"}:
472
+ continue
473
+ try:
474
+ text = wf_file.read_text(encoding="utf-8")
475
+ except OSError:
476
+ continue
477
+ m = _WORKFLOW_NAME_RE.search(text)
478
+ if m:
479
+ raw_name = m.group(1).strip()
480
+ # strip enclosing quotes
481
+ if len(raw_name) >= 2 and raw_name[0] in ('"', "'") and raw_name[-1] == raw_name[0]:
482
+ raw_name = raw_name[1:-1]
483
+ name = raw_name
484
+ else:
485
+ name = ""
486
+ out.append({"file": wf_file.name, "name": name})
487
+ return out
488
+
489
+
490
+ # (needle, label) pairs tried in priority order for both block and inline forms.
491
+ _TRIGGER_NEEDLES = (
492
+ ("tags:", "push: tags"),
493
+ ("release", "release"),
494
+ ("workflow_dispatch", "workflow_dispatch"),
495
+ ("schedule", "schedule"),
496
+ ("pull_request", "pull_request"),
497
+ ("branches:", "push: branches"),
498
+ )
499
+
500
+
501
+ def _classify_trigger(haystack: str) -> str | None:
502
+ """Return the first matching trigger label for *haystack*, or ``None``."""
503
+ for needle, label in _TRIGGER_NEEDLES:
504
+ if needle in haystack:
505
+ return label
506
+ return None
507
+
508
+
509
+ def _summarize_on_block(text: str) -> str:
510
+ """Coarse classifier for the ``on:`` block in a workflow file."""
511
+ block_re = re.compile(r"^on:\s*\n((?:[ \t]+.*\n?)*)", re.MULTILINE)
512
+ m = block_re.search(text)
513
+ if m:
514
+ return _classify_trigger(m.group(0)) or "unknown"
515
+ inline_re = re.compile(r"^on:\s*(.+)$", re.MULTILINE)
516
+ im = inline_re.search(text)
517
+ if im:
518
+ val = im.group(1).strip().lower()
519
+ if "push" in val and not _classify_trigger(val):
520
+ return "push: branches"
521
+ return _classify_trigger(val) or "unknown"
522
+ return "unknown"
523
+
524
+
525
+ def _publish_target(path: Path) -> dict | None:
526
+ """Detect the first PyPI/GHCR publish workflow; return kind/workflow/trigger or None."""
527
+ workflows_dir = path / ".github" / "workflows"
528
+ if not workflows_dir.is_dir():
529
+ return None
530
+ for wf_file in sorted(workflows_dir.iterdir()):
531
+ if wf_file.suffix not in {".yml", ".yaml"}:
532
+ continue
533
+ try:
534
+ text = wf_file.read_text(encoding="utf-8")
535
+ except OSError:
536
+ continue
537
+ if "pypa/gh-action-pypi-publish" in text or "pypi.org" in text:
538
+ kind = "pypi"
539
+ elif "ghcr.io" in text:
540
+ kind = "ghcr"
541
+ else:
542
+ continue
543
+ return {
544
+ "kind": kind,
545
+ "workflow": wf_file.name,
546
+ "trigger": _summarize_on_block(text),
547
+ }
548
+ return None
549
+
550
+
551
+ def _git_remote(path: Path) -> dict | None:
552
+ """Return parsed ``origin`` remote info from git, or None on failure."""
553
+ try:
554
+ result = subprocess.run( # noqa: S603,S607 # nosec B603 B607
555
+ ["git", "remote", "get-url", "origin"],
556
+ cwd=path,
557
+ capture_output=True,
558
+ text=True,
559
+ check=False,
560
+ timeout=2,
561
+ )
562
+ except FileNotFoundError:
563
+ return None
564
+ except subprocess.TimeoutExpired:
565
+ return None
566
+ if result.returncode != 0:
567
+ return None
568
+ raw_url = result.stdout.strip()
569
+ m = _REMOTE_RE.match(raw_url)
570
+ if not m:
571
+ return {"url": raw_url, "ref": "origin"}
572
+ host = m.group(1)
573
+ path_part = m.group(2)
574
+ parts = path_part.split("/", 1)
575
+ owner = parts[0] if len(parts) >= 1 else ""
576
+ repo_name = parts[1] if len(parts) >= 2 else ""
577
+ return {"host": host, "owner": owner, "repo": repo_name, "url": raw_url, "ref": "origin"}
578
+
579
+
580
+ def _collect_module_files(node: dict, base_path: Path, pkg_path: Path) -> list[tuple[str, Path]]:
581
+ """Recursively collect (relative_path_str, abs_path) pairs from a package_tree node."""
582
+ results: list[tuple[str, Path]] = []
583
+ for mod in node.get("modules") or []:
584
+ rel = pkg_path / mod
585
+ abs_path = base_path / rel
586
+ results.append((str(rel), abs_path))
587
+ for sub in node.get("subpackages") or []:
588
+ sub_pkg_path = pkg_path / sub["name"]
589
+ results.extend(_collect_module_files(sub, base_path, sub_pkg_path))
590
+ return results
591
+
592
+
593
+ def _module_docs(path: Path, package_tree: list[dict]) -> list[dict]:
594
+ """Return first-docstring-line summaries for modules in the package tree."""
595
+ out: list[dict] = []
596
+ for node in package_tree:
597
+ pkg_root = Path(node["name"])
598
+ # Check if this package lives under src/
599
+ candidate_src = path / "src" / node["name"]
600
+ if candidate_src.is_dir():
601
+ base_path = path / "src"
602
+ else:
603
+ base_path = path
604
+ pkg_path = pkg_root
605
+ for rel_str, abs_path in _collect_module_files(node, base_path, pkg_path):
606
+ try:
607
+ source = abs_path.read_text(encoding="utf-8")
608
+ tree = ast.parse(source)
609
+ except (SyntaxError, OSError, UnicodeDecodeError):
610
+ continue
611
+ docstring = ast.get_docstring(tree)
612
+ if not docstring:
613
+ continue
614
+ first_line = docstring.strip().splitlines()[0].strip()
615
+ if not first_line:
616
+ continue
617
+ out.append({"module": rel_str, "summary": first_line[:120]})
618
+ out.sort(key=lambda x: x["module"])
619
+ return out
620
+
621
+
622
+ _DEEP_HEADINGS = ("## Project Status", "## Architecture")
623
+ _DEEP_KEYWORDS = ("invariant", "rule", "contract")
624
+
625
+
626
+ def profile_deep(path: Path, *, basic: bool = False) -> dict[str, object]:
627
+ """Shallow profile + readme intro, design-section text, recent commits."""
628
+ p = profile_shallow(path, basic=basic)
629
+ p["readme_intro"] = _read_readme_intro(path)
630
+ p["claude_md_sections"] = _read_claude_md_design_sections(path)
631
+ p["commits_recent"] = _read_recent_commits(path, n=10)
632
+ return p
633
+
634
+
635
+ def _read_readme_intro(path: Path) -> str:
636
+ """Return the first non-heading paragraph of ``README.md``."""
637
+ f = path / "README.md"
638
+ if not f.exists():
639
+ return ""
640
+ out: list[str] = []
641
+ saw_content = False
642
+ for line in f.read_text(encoding="utf-8").splitlines():
643
+ if line.startswith("#"):
644
+ if saw_content:
645
+ break
646
+ continue
647
+ if not line.strip():
648
+ if saw_content:
649
+ break
650
+ continue
651
+ saw_content = True
652
+ out.append(line.rstrip())
653
+ return "\n".join(out).strip()
654
+
655
+
656
+ def _read_claude_md_design_sections(path: Path) -> str:
657
+ """Return concatenated text of design-related ``## ...`` sections in CLAUDE.md."""
658
+ f = path / "CLAUDE.md"
659
+ if not f.exists():
660
+ return ""
661
+ chunks: list[str] = []
662
+ current_heading: str | None = None
663
+ current_body: list[str] = []
664
+ for line in f.read_text(encoding="utf-8").splitlines():
665
+ if line.startswith("## "):
666
+ if current_heading and _heading_is_design(current_heading):
667
+ chunks.append(current_heading + "\n" + "\n".join(current_body).rstrip())
668
+ current_heading = line.strip()
669
+ current_body = []
670
+ continue
671
+ current_body.append(line)
672
+ if current_heading and _heading_is_design(current_heading):
673
+ chunks.append(current_heading + "\n" + "\n".join(current_body).rstrip())
674
+ return "\n\n".join(chunks).strip()
675
+
676
+
677
+ def _heading_is_design(heading: str) -> bool:
678
+ """Return True for headings that capture design intent (status/architecture/invariants/etc.)."""
679
+ if heading in _DEEP_HEADINGS:
680
+ return True
681
+ low = heading.lower()
682
+ return any(k in low for k in _DEEP_KEYWORDS)
683
+
684
+
685
+ def _read_recent_commits(path: Path, *, n: int) -> list[str]:
686
+ """Return up to ``n`` recent commit subjects via ``git log`` (empty list if no git)."""
687
+ if not (path / ".git").exists():
688
+ return []
689
+ try:
690
+ result = subprocess.run( # noqa: S603,S607 # nosec B603 B607
691
+ ["git", "-C", str(path), "log", f"-{n}", "--pretty=format:%s"],
692
+ capture_output=True,
693
+ text=True,
694
+ check=False,
695
+ )
696
+ except FileNotFoundError:
697
+ return []
698
+ if result.returncode != 0:
699
+ return []
700
+ return [line for line in result.stdout.splitlines() if line.strip()]