code-lens-cli 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_lens_cli-0.7.0.dist-info/METADATA +36 -0
- code_lens_cli-0.7.0.dist-info/RECORD +33 -0
- code_lens_cli-0.7.0.dist-info/WHEEL +4 -0
- code_lens_cli-0.7.0.dist-info/entry_points.txt +3 -0
- code_lens_cli-0.7.0.dist-info/licenses/LICENSE +21 -0
- seer/__init__.py +34 -0
- seer/__main__.py +8 -0
- seer/cli/__init__.py +117 -0
- seer/cli/_commands/__init__.py +1 -0
- seer/cli/_commands/classify.py +40 -0
- seer/cli/_commands/explain.py +44 -0
- seer/cli/_commands/grep.py +44 -0
- seer/cli/_commands/learn.py +49 -0
- seer/cli/_commands/recent.py +52 -0
- seer/cli/_commands/whoami.py +42 -0
- seer/cli/_errors.py +59 -0
- seer/cli/_output.py +47 -0
- seer/lookup/__init__.py +25 -0
- seer/lookup/ast_scope.py +74 -0
- seer/lookup/classify.py +301 -0
- seer/lookup/grep_context.py +160 -0
- seer/lookup/recent_outline.py +304 -0
- seer/lookup/render.py +41 -0
- seer/repo/__init__.py +9 -0
- seer/repo/__main__.py +228 -0
- seer/repo/config.py +57 -0
- seer/repo/connections.py +298 -0
- seer/repo/detect.py +86 -0
- seer/repo/errors.py +81 -0
- seer/repo/graph.py +182 -0
- seer/repo/manifest.py +36 -0
- seer/repo/profile.py +700 -0
- seer/repo/render.py +470 -0
seer/repo/profile.py
ADDED
|
@@ -0,0 +1,700 @@
|
|
|
1
|
+
"""Single-repo profiler.
|
|
2
|
+
|
|
3
|
+
Two depths:
|
|
4
|
+
|
|
5
|
+
* shallow (default) — mechanical facts from pyproject.toml, on-disk layout,
|
|
6
|
+
vendored-skill list, CITATION.md, CHANGELOG.md, CLAUDE.md status section.
|
|
7
|
+
* deep — shallow + README intro, CLAUDE.md design sections, last 10
|
|
8
|
+
commit subjects (added in :func:`profile_deep`, separate task).
|
|
9
|
+
|
|
10
|
+
Missing optional sources degrade silently to empty fields.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import ast
|
|
16
|
+
import json
|
|
17
|
+
import re
|
|
18
|
+
import subprocess # noqa: S404 # nosec B404
|
|
19
|
+
import tomllib
|
|
20
|
+
import urllib.request
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
import yaml
|
|
24
|
+
|
|
25
|
+
from seer.repo.detect import resolve_name
|
|
26
|
+
from seer.repo.manifest import read_pyproject
|
|
27
|
+
|
|
28
|
+
_WORKFLOW_NAME_RE = re.compile(r"^name:\s*(.+?)\s*$", re.MULTILINE)
|
|
29
|
+
_REMOTE_RE = re.compile(r"^(?:git@|https?://)([^:/]+)[:/](.+?)(?:\.git)?/?$")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
_GH_TIMEOUT = 10 # seconds per gh api call
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _gh_api(endpoint: str) -> dict | None:
|
|
36
|
+
"""Run ``gh api <endpoint>`` and return parsed JSON, or None on any failure."""
|
|
37
|
+
try:
|
|
38
|
+
result = subprocess.run( # noqa: S603,S607 # nosec B603 B607
|
|
39
|
+
["gh", "api", endpoint],
|
|
40
|
+
capture_output=True,
|
|
41
|
+
text=True,
|
|
42
|
+
check=False,
|
|
43
|
+
timeout=_GH_TIMEOUT,
|
|
44
|
+
)
|
|
45
|
+
except FileNotFoundError:
|
|
46
|
+
return None
|
|
47
|
+
except subprocess.TimeoutExpired:
|
|
48
|
+
return None
|
|
49
|
+
if result.returncode != 0:
|
|
50
|
+
return None
|
|
51
|
+
try:
|
|
52
|
+
return json.loads(result.stdout)
|
|
53
|
+
except json.JSONDecodeError:
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _github_state(git_remote: dict | None) -> dict | None:
|
|
58
|
+
"""Return live GitHub repo state via ``gh api``.
|
|
59
|
+
|
|
60
|
+
Queries three endpoints: repo metadata (default branch + open issues),
|
|
61
|
+
latest release, and latest CI run on the default branch. Any network /
|
|
62
|
+
parse / missing-key failure causes a ``None`` return — callers must treat
|
|
63
|
+
the field as optional.
|
|
64
|
+
"""
|
|
65
|
+
if git_remote is None:
|
|
66
|
+
return None
|
|
67
|
+
owner = git_remote.get("owner")
|
|
68
|
+
repo_name = git_remote.get("repo")
|
|
69
|
+
if not owner or not repo_name:
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
slug = f"{owner}/{repo_name}"
|
|
73
|
+
|
|
74
|
+
repo_data = _gh_api(f"repos/{slug}")
|
|
75
|
+
if repo_data is None:
|
|
76
|
+
return None
|
|
77
|
+
try:
|
|
78
|
+
default_branch = repo_data["default_branch"]
|
|
79
|
+
open_issues = repo_data["open_issues_count"]
|
|
80
|
+
except KeyError:
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
release_data = _gh_api(f"repos/{slug}/releases/latest")
|
|
84
|
+
latest_release: dict | None = None
|
|
85
|
+
if release_data is not None:
|
|
86
|
+
try:
|
|
87
|
+
latest_release = {
|
|
88
|
+
"tag": release_data["tag_name"],
|
|
89
|
+
"published_at": release_data["published_at"],
|
|
90
|
+
}
|
|
91
|
+
except KeyError:
|
|
92
|
+
latest_release = None
|
|
93
|
+
|
|
94
|
+
runs_data = _gh_api(f"repos/{slug}/actions/runs?branch={default_branch}&per_page=1")
|
|
95
|
+
ci_status: str | None = None
|
|
96
|
+
if runs_data is not None:
|
|
97
|
+
try:
|
|
98
|
+
runs = runs_data.get("workflow_runs") or []
|
|
99
|
+
if runs:
|
|
100
|
+
ci_status = runs[0].get("conclusion")
|
|
101
|
+
except (KeyError, IndexError):
|
|
102
|
+
ci_status = None
|
|
103
|
+
|
|
104
|
+
return {
|
|
105
|
+
"latest_release": latest_release,
|
|
106
|
+
"open_issues": open_issues,
|
|
107
|
+
"default_branch": default_branch,
|
|
108
|
+
"ci_status_on_default": ci_status,
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _pypi_state(pkg_name: str | None) -> dict | None:
|
|
113
|
+
"""Return published package state from the PyPI JSON API.
|
|
114
|
+
|
|
115
|
+
Queries ``https://pypi.org/pypi/<pkg_name>/json`` and extracts the
|
|
116
|
+
latest version and its upload timestamp. Any network / parse / structural
|
|
117
|
+
failure returns ``None`` — callers must treat the field as optional.
|
|
118
|
+
"""
|
|
119
|
+
if not pkg_name:
|
|
120
|
+
return None
|
|
121
|
+
url = f"https://pypi.org/pypi/{pkg_name}/json"
|
|
122
|
+
try:
|
|
123
|
+
with urllib.request.urlopen(url, timeout=5) as resp: # noqa: S310 # nosec B310
|
|
124
|
+
raw = resp.read()
|
|
125
|
+
except OSError:
|
|
126
|
+
return None
|
|
127
|
+
try:
|
|
128
|
+
data = json.loads(raw)
|
|
129
|
+
version = data["info"]["version"]
|
|
130
|
+
releases = data.get("releases") or {}
|
|
131
|
+
release_files = releases.get(version) or []
|
|
132
|
+
released_at: str | None = None
|
|
133
|
+
if release_files:
|
|
134
|
+
released_at = release_files[0].get("upload_time_iso_8601")
|
|
135
|
+
return {"latest_version": version, "released_at": released_at}
|
|
136
|
+
except (json.JSONDecodeError, KeyError, IndexError):
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def profile_shallow(path: Path, *, basic: bool = False) -> dict[str, object]:
|
|
141
|
+
"""Return a shallow profile dict for the repo at ``path``.
|
|
142
|
+
|
|
143
|
+
Reads from multiple optional sources (pyproject.toml, CLAUDE.md,
|
|
144
|
+
CHANGELOG.md, CITATION.md, .claude/skills/, culture.yaml) and degrades
|
|
145
|
+
silently when any source is missing.
|
|
146
|
+
|
|
147
|
+
When ``basic=True`` the Tier-2 online fields (``github_state``,
|
|
148
|
+
``pypi_state``) are skipped entirely — no subprocess or network calls are
|
|
149
|
+
made for those fields.
|
|
150
|
+
"""
|
|
151
|
+
has_pyproject = (path / _PYPROJECT_TOML).exists()
|
|
152
|
+
if has_pyproject:
|
|
153
|
+
m = read_pyproject(path)
|
|
154
|
+
language = "python"
|
|
155
|
+
manifest: str | None = _PYPROJECT_TOML
|
|
156
|
+
try:
|
|
157
|
+
raw_pyproject: dict | None = tomllib.loads(
|
|
158
|
+
(path / _PYPROJECT_TOML).read_text(encoding="utf-8")
|
|
159
|
+
)
|
|
160
|
+
except (tomllib.TOMLDecodeError, OSError):
|
|
161
|
+
raw_pyproject = None
|
|
162
|
+
else:
|
|
163
|
+
m = {
|
|
164
|
+
"name": resolve_name(path),
|
|
165
|
+
"version": "",
|
|
166
|
+
"entry_points": {},
|
|
167
|
+
"deps_runtime": [],
|
|
168
|
+
"deps_dev": [],
|
|
169
|
+
}
|
|
170
|
+
language = "unknown"
|
|
171
|
+
manifest = None
|
|
172
|
+
raw_pyproject = None
|
|
173
|
+
package_tree = _package_tree(path)
|
|
174
|
+
git_remote = _git_remote(path)
|
|
175
|
+
pkg_name: str | None = m.get("name") or None # type: ignore[assignment]
|
|
176
|
+
profile: dict[str, object] = {
|
|
177
|
+
"path": str(path),
|
|
178
|
+
"name": m["name"],
|
|
179
|
+
"version": m["version"],
|
|
180
|
+
"language": language,
|
|
181
|
+
"manifest": manifest,
|
|
182
|
+
"entry_points": m["entry_points"],
|
|
183
|
+
"deps_runtime": m["deps_runtime"],
|
|
184
|
+
"deps_dev": m["deps_dev"],
|
|
185
|
+
"package_layout": _list_packages(path),
|
|
186
|
+
"package_tree": package_tree,
|
|
187
|
+
"build_test": _build_test(raw_pyproject),
|
|
188
|
+
"ci_workflows": _ci_workflows(path),
|
|
189
|
+
"publish_target": _publish_target(path),
|
|
190
|
+
"git_remote": git_remote,
|
|
191
|
+
"module_summaries": _module_docs(path, package_tree),
|
|
192
|
+
"github_state": None if basic else _github_state(git_remote),
|
|
193
|
+
"pypi_state": None if basic else _pypi_state(pkg_name),
|
|
194
|
+
"vendored_skills": _list_vendored_skills(path),
|
|
195
|
+
"citations": _read_citations(path),
|
|
196
|
+
"changelog_recent": _read_changelog(path, n=3),
|
|
197
|
+
"claude_md_status": _read_claude_md_section(path, "## Project Status"),
|
|
198
|
+
"extra": {},
|
|
199
|
+
}
|
|
200
|
+
nick = _read_culture_nick(path)
|
|
201
|
+
if nick:
|
|
202
|
+
profile["extra"]["culture_nick"] = nick # type: ignore[index]
|
|
203
|
+
return profile
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
_PKG_EXCLUDE = {"tests", "docs", "scripts", "__pycache__"}
|
|
207
|
+
_INIT_PY = "__init__.py"
|
|
208
|
+
_PYPROJECT_TOML = "pyproject.toml"
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _is_candidate_pkg_dir(child: Path) -> bool:
|
|
212
|
+
"""True if *child* is a non-hidden, non-excluded directory worth scanning."""
|
|
213
|
+
return child.is_dir() and not child.name.startswith(".") and child.name not in _PKG_EXCLUDE
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def _list_packages(path: Path) -> list[str]:
|
|
217
|
+
"""Return one-level Python packages at the repo root or under ``src/``."""
|
|
218
|
+
out: list[str] = []
|
|
219
|
+
for child in sorted(path.iterdir()):
|
|
220
|
+
if not _is_candidate_pkg_dir(child):
|
|
221
|
+
continue
|
|
222
|
+
if (child / _INIT_PY).exists():
|
|
223
|
+
out.append(child.name + "/")
|
|
224
|
+
src = path / "src"
|
|
225
|
+
if src.is_dir():
|
|
226
|
+
for child in sorted(src.iterdir()):
|
|
227
|
+
if not _is_candidate_pkg_dir(child):
|
|
228
|
+
continue
|
|
229
|
+
if (child / _INIT_PY).exists():
|
|
230
|
+
out.append(f"src/{child.name}/")
|
|
231
|
+
return out
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _package_node(pkg_dir: Path, *, remaining_depth: int) -> dict[str, object]:
|
|
235
|
+
"""Build one tree node for *pkg_dir*; recurse into subpackages until depth exhausted."""
|
|
236
|
+
modules: list[str] = []
|
|
237
|
+
subpackages: list[dict[str, object]] = []
|
|
238
|
+
for child in sorted(pkg_dir.iterdir()):
|
|
239
|
+
if child.name.startswith(".") or child.name in _PKG_EXCLUDE:
|
|
240
|
+
continue
|
|
241
|
+
if child.is_file() and child.suffix == ".py":
|
|
242
|
+
modules.append(child.name)
|
|
243
|
+
continue
|
|
244
|
+
if child.is_dir() and (child / _INIT_PY).exists() and remaining_depth > 0:
|
|
245
|
+
subpackages.append(_package_node(child, remaining_depth=remaining_depth - 1))
|
|
246
|
+
return {"name": pkg_dir.name, "modules": modules, "subpackages": subpackages}
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _package_tree(path: Path, *, max_depth: int = 2) -> list[dict[str, object]]:
|
|
250
|
+
"""Return one node per top-level package with up to ``max_depth`` levels of subpackages.
|
|
251
|
+
|
|
252
|
+
Walks the same roots as :func:`_list_packages` (repo root + ``src/``) and
|
|
253
|
+
honors the same exclude set, so callers that consume both the flat
|
|
254
|
+
``package_layout`` and the nested ``package_tree`` see consistent contents.
|
|
255
|
+
|
|
256
|
+
``max_depth=2`` means: top-level package (e.g. ``demo/``) plus up to two
|
|
257
|
+
nested levels of subpackages (e.g. ``demo/cli/`` and ``demo/cli/_commands/``).
|
|
258
|
+
"""
|
|
259
|
+
out: list[dict[str, object]] = []
|
|
260
|
+
for child in sorted(path.iterdir()):
|
|
261
|
+
if not _is_candidate_pkg_dir(child):
|
|
262
|
+
continue
|
|
263
|
+
if (child / _INIT_PY).exists():
|
|
264
|
+
out.append(_package_node(child, remaining_depth=max_depth))
|
|
265
|
+
src = path / "src"
|
|
266
|
+
if src.is_dir():
|
|
267
|
+
for child in sorted(src.iterdir()):
|
|
268
|
+
if not _is_candidate_pkg_dir(child):
|
|
269
|
+
continue
|
|
270
|
+
if (child / _INIT_PY).exists():
|
|
271
|
+
out.append(_package_node(child, remaining_depth=max_depth))
|
|
272
|
+
return out
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _list_vendored_skills(path: Path) -> list[dict[str, str]]:
|
|
276
|
+
"""Return ``.claude/skills/*`` entries, augmented with provenance when present."""
|
|
277
|
+
skills_dir = path / ".claude" / "skills"
|
|
278
|
+
if not skills_dir.is_dir():
|
|
279
|
+
return []
|
|
280
|
+
skills: list[dict[str, str]] = []
|
|
281
|
+
for skill_dir in sorted(skills_dir.iterdir()):
|
|
282
|
+
if skill_dir.is_dir():
|
|
283
|
+
skills.append(
|
|
284
|
+
{
|
|
285
|
+
"name": skill_dir.name,
|
|
286
|
+
"path": f".claude/skills/{skill_dir.name}/",
|
|
287
|
+
}
|
|
288
|
+
)
|
|
289
|
+
provenance = _read_skill_sources(path)
|
|
290
|
+
for skill in skills:
|
|
291
|
+
if skill["name"] in provenance:
|
|
292
|
+
skill.update(provenance[skill["name"]])
|
|
293
|
+
return skills
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def _unwrap_backticks(val: str) -> str:
|
|
297
|
+
"""Strip a *fully balanced* ```…``` pair from *val* and trim whitespace.
|
|
298
|
+
|
|
299
|
+
Cells with internal ```…``` spans (e.g.
|
|
300
|
+
```agentculture/steward` (`.claude/skills/cicd/`)``)
|
|
301
|
+
are left intact so the rendered markdown stays valid.
|
|
302
|
+
"""
|
|
303
|
+
v = val.strip()
|
|
304
|
+
if len(v) >= 2 and v.startswith("`") and v.endswith("`"):
|
|
305
|
+
return v[1:-1].strip()
|
|
306
|
+
return v
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def _read_skill_sources(path: Path) -> dict[str, dict[str, str]]:
|
|
310
|
+
"""Parse ``docs/skill-sources.md`` table rows into ``{name: {source, version}}``."""
|
|
311
|
+
f = path / "docs" / "skill-sources.md"
|
|
312
|
+
if not f.exists():
|
|
313
|
+
return {}
|
|
314
|
+
out: dict[str, dict[str, str]] = {}
|
|
315
|
+
for line in f.read_text(encoding="utf-8").splitlines():
|
|
316
|
+
s = line.strip()
|
|
317
|
+
if not s.startswith("|") or "---" in s:
|
|
318
|
+
continue
|
|
319
|
+
parts = [p.strip() for p in s.strip("|").split("|")]
|
|
320
|
+
if len(parts) >= 2 and parts[0] and parts[1] and parts[0] not in {"name", "Skill"}:
|
|
321
|
+
key = _unwrap_backticks(parts[0])
|
|
322
|
+
if key.lower() in {"name", "skill"}:
|
|
323
|
+
continue
|
|
324
|
+
out[key] = {
|
|
325
|
+
"source": _unwrap_backticks(parts[1]),
|
|
326
|
+
"version": _unwrap_backticks(parts[2]) if len(parts) >= 3 else "",
|
|
327
|
+
}
|
|
328
|
+
return out
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def _read_citations(path: Path) -> list[dict[str, str]]:
|
|
332
|
+
"""Parse ``CITATION.md`` rows into ``[{local, source_repo, sha}]``."""
|
|
333
|
+
f = path / "CITATION.md"
|
|
334
|
+
if not f.exists():
|
|
335
|
+
return []
|
|
336
|
+
out: list[dict[str, str]] = []
|
|
337
|
+
for line in f.read_text(encoding="utf-8").splitlines():
|
|
338
|
+
s = line.strip()
|
|
339
|
+
if not s.startswith("|") or "---" in s:
|
|
340
|
+
continue
|
|
341
|
+
parts = [p.strip() for p in s.strip("|").split("|")]
|
|
342
|
+
if len(parts) >= 3 and parts[0] and parts[1] and parts[2]:
|
|
343
|
+
first = _unwrap_backticks(parts[0]).lower()
|
|
344
|
+
if first.startswith("local") or first in {"path", "file"}:
|
|
345
|
+
continue
|
|
346
|
+
out.append(
|
|
347
|
+
{
|
|
348
|
+
"local": _unwrap_backticks(parts[0]),
|
|
349
|
+
"source_repo": _unwrap_backticks(parts[1]),
|
|
350
|
+
"sha": _unwrap_backticks(parts[2]),
|
|
351
|
+
}
|
|
352
|
+
)
|
|
353
|
+
return out
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def _is_changelog_summary_line(line: str) -> bool:
|
|
357
|
+
"""True when *line* is the first body line that should become an entry summary."""
|
|
358
|
+
body = line.strip()
|
|
359
|
+
if not body:
|
|
360
|
+
return False
|
|
361
|
+
return not body.startswith("#")
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
def _first_changelog_summary(body_lines: list[str]) -> str:
|
|
365
|
+
"""Return the first viable summary line from a slice of body lines, else ``""``."""
|
|
366
|
+
for line in body_lines:
|
|
367
|
+
if _is_changelog_summary_line(line):
|
|
368
|
+
return line.strip().lstrip("-").strip()
|
|
369
|
+
return ""
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def _read_changelog(path: Path, *, n: int) -> list[dict[str, str]]:
|
|
373
|
+
"""Return up to ``n`` recent entries from ``CHANGELOG.md`` (Keep-a-Changelog).
|
|
374
|
+
|
|
375
|
+
Two-pass: collect heading indices first, then extract one summary line
|
|
376
|
+
per heading from the body slice between it and the next heading. This
|
|
377
|
+
keeps the per-function cognitive complexity small.
|
|
378
|
+
"""
|
|
379
|
+
f = path / "CHANGELOG.md"
|
|
380
|
+
if not f.exists():
|
|
381
|
+
return []
|
|
382
|
+
lines = f.read_text(encoding="utf-8").splitlines()
|
|
383
|
+
heading_positions = [(i, line) for i, line in enumerate(lines) if line.startswith("## ")][:n]
|
|
384
|
+
entries: list[dict[str, str]] = []
|
|
385
|
+
for idx, (start, heading_line) in enumerate(heading_positions):
|
|
386
|
+
entry = _parse_changelog_heading(heading_line)
|
|
387
|
+
next_heading = (
|
|
388
|
+
heading_positions[idx + 1][0] if idx + 1 < len(heading_positions) else len(lines)
|
|
389
|
+
)
|
|
390
|
+
entry["summary"] = _first_changelog_summary(lines[start + 1 : next_heading])
|
|
391
|
+
entries.append(entry)
|
|
392
|
+
return entries
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
def _parse_changelog_heading(line: str) -> dict[str, str]:
|
|
396
|
+
"""Extract version and date from a Keep-a-Changelog heading line."""
|
|
397
|
+
text = line[3:].strip()
|
|
398
|
+
if text.startswith("[") and "]" in text:
|
|
399
|
+
version = text[1 : text.index("]")]
|
|
400
|
+
rest = text[text.index("]") + 1 :].lstrip(" -")
|
|
401
|
+
return {"version": version, "date": rest.strip()}
|
|
402
|
+
parts = text.split()
|
|
403
|
+
version = parts[0] if parts else ""
|
|
404
|
+
date = parts[-1].strip("()") if len(parts) > 1 else ""
|
|
405
|
+
return {"version": version, "date": date}
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
def _read_claude_md_section(path: Path, heading: str) -> str:
|
|
409
|
+
"""Return the body of a ``## Heading`` section from CLAUDE.md, stripped."""
|
|
410
|
+
f = path / "CLAUDE.md"
|
|
411
|
+
if not f.exists():
|
|
412
|
+
return ""
|
|
413
|
+
inside = False
|
|
414
|
+
out: list[str] = []
|
|
415
|
+
for line in f.read_text(encoding="utf-8").splitlines():
|
|
416
|
+
if line.strip() == heading:
|
|
417
|
+
inside = True
|
|
418
|
+
continue
|
|
419
|
+
if inside:
|
|
420
|
+
if line.startswith("## "):
|
|
421
|
+
break
|
|
422
|
+
out.append(line)
|
|
423
|
+
return "\n".join(out).strip()
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
def _read_culture_nick(path: Path) -> str:
|
|
427
|
+
"""Return ``agents[0].suffix`` (or ``.nick``) from ``culture.yaml`` if present."""
|
|
428
|
+
f = path / "culture.yaml"
|
|
429
|
+
if not f.exists():
|
|
430
|
+
return ""
|
|
431
|
+
try:
|
|
432
|
+
data = yaml.safe_load(f.read_text(encoding="utf-8")) or {}
|
|
433
|
+
except yaml.YAMLError:
|
|
434
|
+
return ""
|
|
435
|
+
agents = data.get("agents", [])
|
|
436
|
+
if not agents or not isinstance(agents[0], dict):
|
|
437
|
+
return ""
|
|
438
|
+
return str(agents[0].get("suffix") or agents[0].get("nick") or "")
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def _build_test(pyproject: dict | None) -> dict | None:
|
|
442
|
+
"""Extract test/coverage/python metadata from a raw pyproject dict.
|
|
443
|
+
|
|
444
|
+
Returns a dict with some subset of ``test_command``, ``test_addopts``,
|
|
445
|
+
``coverage_fail_under``, and ``python_requires``. Keys whose value is
|
|
446
|
+
None are dropped. Returns None when *pyproject* is None.
|
|
447
|
+
"""
|
|
448
|
+
if pyproject is None:
|
|
449
|
+
return None
|
|
450
|
+
pytest_opts = pyproject.get("tool") or {}
|
|
451
|
+
pytest_addopts = ((pytest_opts.get("pytest") or {}).get("ini_options") or {}).get("addopts")
|
|
452
|
+
coverage_fail = ((pytest_opts.get("coverage") or {}).get("report") or {}).get("fail_under")
|
|
453
|
+
python_requires = (pyproject.get("project") or {}).get("requires-python")
|
|
454
|
+
result: dict = {"test_command": "pytest"}
|
|
455
|
+
if pytest_addopts is not None:
|
|
456
|
+
result["test_addopts"] = pytest_addopts
|
|
457
|
+
if coverage_fail is not None:
|
|
458
|
+
result["coverage_fail_under"] = coverage_fail
|
|
459
|
+
if python_requires is not None:
|
|
460
|
+
result["python_requires"] = python_requires
|
|
461
|
+
return result
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
def _ci_workflows(path: Path) -> list[dict[str, str]]:
|
|
465
|
+
"""Scan ``.github/workflows/*.{yml,yaml}`` and return name + filename entries."""
|
|
466
|
+
workflows_dir = path / ".github" / "workflows"
|
|
467
|
+
if not workflows_dir.is_dir():
|
|
468
|
+
return []
|
|
469
|
+
out: list[dict[str, str]] = []
|
|
470
|
+
for wf_file in sorted(workflows_dir.iterdir()):
|
|
471
|
+
if wf_file.suffix not in {".yml", ".yaml"}:
|
|
472
|
+
continue
|
|
473
|
+
try:
|
|
474
|
+
text = wf_file.read_text(encoding="utf-8")
|
|
475
|
+
except OSError:
|
|
476
|
+
continue
|
|
477
|
+
m = _WORKFLOW_NAME_RE.search(text)
|
|
478
|
+
if m:
|
|
479
|
+
raw_name = m.group(1).strip()
|
|
480
|
+
# strip enclosing quotes
|
|
481
|
+
if len(raw_name) >= 2 and raw_name[0] in ('"', "'") and raw_name[-1] == raw_name[0]:
|
|
482
|
+
raw_name = raw_name[1:-1]
|
|
483
|
+
name = raw_name
|
|
484
|
+
else:
|
|
485
|
+
name = ""
|
|
486
|
+
out.append({"file": wf_file.name, "name": name})
|
|
487
|
+
return out
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
# (needle, label) pairs tried in priority order for both block and inline forms.
|
|
491
|
+
_TRIGGER_NEEDLES = (
|
|
492
|
+
("tags:", "push: tags"),
|
|
493
|
+
("release", "release"),
|
|
494
|
+
("workflow_dispatch", "workflow_dispatch"),
|
|
495
|
+
("schedule", "schedule"),
|
|
496
|
+
("pull_request", "pull_request"),
|
|
497
|
+
("branches:", "push: branches"),
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
def _classify_trigger(haystack: str) -> str | None:
|
|
502
|
+
"""Return the first matching trigger label for *haystack*, or ``None``."""
|
|
503
|
+
for needle, label in _TRIGGER_NEEDLES:
|
|
504
|
+
if needle in haystack:
|
|
505
|
+
return label
|
|
506
|
+
return None
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
def _summarize_on_block(text: str) -> str:
|
|
510
|
+
"""Coarse classifier for the ``on:`` block in a workflow file."""
|
|
511
|
+
block_re = re.compile(r"^on:\s*\n((?:[ \t]+.*\n?)*)", re.MULTILINE)
|
|
512
|
+
m = block_re.search(text)
|
|
513
|
+
if m:
|
|
514
|
+
return _classify_trigger(m.group(0)) or "unknown"
|
|
515
|
+
inline_re = re.compile(r"^on:\s*(.+)$", re.MULTILINE)
|
|
516
|
+
im = inline_re.search(text)
|
|
517
|
+
if im:
|
|
518
|
+
val = im.group(1).strip().lower()
|
|
519
|
+
if "push" in val and not _classify_trigger(val):
|
|
520
|
+
return "push: branches"
|
|
521
|
+
return _classify_trigger(val) or "unknown"
|
|
522
|
+
return "unknown"
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
def _publish_target(path: Path) -> dict | None:
|
|
526
|
+
"""Detect the first PyPI/GHCR publish workflow; return kind/workflow/trigger or None."""
|
|
527
|
+
workflows_dir = path / ".github" / "workflows"
|
|
528
|
+
if not workflows_dir.is_dir():
|
|
529
|
+
return None
|
|
530
|
+
for wf_file in sorted(workflows_dir.iterdir()):
|
|
531
|
+
if wf_file.suffix not in {".yml", ".yaml"}:
|
|
532
|
+
continue
|
|
533
|
+
try:
|
|
534
|
+
text = wf_file.read_text(encoding="utf-8")
|
|
535
|
+
except OSError:
|
|
536
|
+
continue
|
|
537
|
+
if "pypa/gh-action-pypi-publish" in text or "pypi.org" in text:
|
|
538
|
+
kind = "pypi"
|
|
539
|
+
elif "ghcr.io" in text:
|
|
540
|
+
kind = "ghcr"
|
|
541
|
+
else:
|
|
542
|
+
continue
|
|
543
|
+
return {
|
|
544
|
+
"kind": kind,
|
|
545
|
+
"workflow": wf_file.name,
|
|
546
|
+
"trigger": _summarize_on_block(text),
|
|
547
|
+
}
|
|
548
|
+
return None
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
def _git_remote(path: Path) -> dict | None:
|
|
552
|
+
"""Return parsed ``origin`` remote info from git, or None on failure."""
|
|
553
|
+
try:
|
|
554
|
+
result = subprocess.run( # noqa: S603,S607 # nosec B603 B607
|
|
555
|
+
["git", "remote", "get-url", "origin"],
|
|
556
|
+
cwd=path,
|
|
557
|
+
capture_output=True,
|
|
558
|
+
text=True,
|
|
559
|
+
check=False,
|
|
560
|
+
timeout=2,
|
|
561
|
+
)
|
|
562
|
+
except FileNotFoundError:
|
|
563
|
+
return None
|
|
564
|
+
except subprocess.TimeoutExpired:
|
|
565
|
+
return None
|
|
566
|
+
if result.returncode != 0:
|
|
567
|
+
return None
|
|
568
|
+
raw_url = result.stdout.strip()
|
|
569
|
+
m = _REMOTE_RE.match(raw_url)
|
|
570
|
+
if not m:
|
|
571
|
+
return {"url": raw_url, "ref": "origin"}
|
|
572
|
+
host = m.group(1)
|
|
573
|
+
path_part = m.group(2)
|
|
574
|
+
parts = path_part.split("/", 1)
|
|
575
|
+
owner = parts[0] if len(parts) >= 1 else ""
|
|
576
|
+
repo_name = parts[1] if len(parts) >= 2 else ""
|
|
577
|
+
return {"host": host, "owner": owner, "repo": repo_name, "url": raw_url, "ref": "origin"}
|
|
578
|
+
|
|
579
|
+
|
|
580
|
+
def _collect_module_files(node: dict, base_path: Path, pkg_path: Path) -> list[tuple[str, Path]]:
|
|
581
|
+
"""Recursively collect (relative_path_str, abs_path) pairs from a package_tree node."""
|
|
582
|
+
results: list[tuple[str, Path]] = []
|
|
583
|
+
for mod in node.get("modules") or []:
|
|
584
|
+
rel = pkg_path / mod
|
|
585
|
+
abs_path = base_path / rel
|
|
586
|
+
results.append((str(rel), abs_path))
|
|
587
|
+
for sub in node.get("subpackages") or []:
|
|
588
|
+
sub_pkg_path = pkg_path / sub["name"]
|
|
589
|
+
results.extend(_collect_module_files(sub, base_path, sub_pkg_path))
|
|
590
|
+
return results
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
def _module_docs(path: Path, package_tree: list[dict]) -> list[dict]:
|
|
594
|
+
"""Return first-docstring-line summaries for modules in the package tree."""
|
|
595
|
+
out: list[dict] = []
|
|
596
|
+
for node in package_tree:
|
|
597
|
+
pkg_root = Path(node["name"])
|
|
598
|
+
# Check if this package lives under src/
|
|
599
|
+
candidate_src = path / "src" / node["name"]
|
|
600
|
+
if candidate_src.is_dir():
|
|
601
|
+
base_path = path / "src"
|
|
602
|
+
else:
|
|
603
|
+
base_path = path
|
|
604
|
+
pkg_path = pkg_root
|
|
605
|
+
for rel_str, abs_path in _collect_module_files(node, base_path, pkg_path):
|
|
606
|
+
try:
|
|
607
|
+
source = abs_path.read_text(encoding="utf-8")
|
|
608
|
+
tree = ast.parse(source)
|
|
609
|
+
except (SyntaxError, OSError, UnicodeDecodeError):
|
|
610
|
+
continue
|
|
611
|
+
docstring = ast.get_docstring(tree)
|
|
612
|
+
if not docstring:
|
|
613
|
+
continue
|
|
614
|
+
first_line = docstring.strip().splitlines()[0].strip()
|
|
615
|
+
if not first_line:
|
|
616
|
+
continue
|
|
617
|
+
out.append({"module": rel_str, "summary": first_line[:120]})
|
|
618
|
+
out.sort(key=lambda x: x["module"])
|
|
619
|
+
return out
|
|
620
|
+
|
|
621
|
+
|
|
622
|
+
_DEEP_HEADINGS = ("## Project Status", "## Architecture")
|
|
623
|
+
_DEEP_KEYWORDS = ("invariant", "rule", "contract")
|
|
624
|
+
|
|
625
|
+
|
|
626
|
+
def profile_deep(path: Path, *, basic: bool = False) -> dict[str, object]:
|
|
627
|
+
"""Shallow profile + readme intro, design-section text, recent commits."""
|
|
628
|
+
p = profile_shallow(path, basic=basic)
|
|
629
|
+
p["readme_intro"] = _read_readme_intro(path)
|
|
630
|
+
p["claude_md_sections"] = _read_claude_md_design_sections(path)
|
|
631
|
+
p["commits_recent"] = _read_recent_commits(path, n=10)
|
|
632
|
+
return p
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
def _read_readme_intro(path: Path) -> str:
|
|
636
|
+
"""Return the first non-heading paragraph of ``README.md``."""
|
|
637
|
+
f = path / "README.md"
|
|
638
|
+
if not f.exists():
|
|
639
|
+
return ""
|
|
640
|
+
out: list[str] = []
|
|
641
|
+
saw_content = False
|
|
642
|
+
for line in f.read_text(encoding="utf-8").splitlines():
|
|
643
|
+
if line.startswith("#"):
|
|
644
|
+
if saw_content:
|
|
645
|
+
break
|
|
646
|
+
continue
|
|
647
|
+
if not line.strip():
|
|
648
|
+
if saw_content:
|
|
649
|
+
break
|
|
650
|
+
continue
|
|
651
|
+
saw_content = True
|
|
652
|
+
out.append(line.rstrip())
|
|
653
|
+
return "\n".join(out).strip()
|
|
654
|
+
|
|
655
|
+
|
|
656
|
+
def _read_claude_md_design_sections(path: Path) -> str:
|
|
657
|
+
"""Return concatenated text of design-related ``## ...`` sections in CLAUDE.md."""
|
|
658
|
+
f = path / "CLAUDE.md"
|
|
659
|
+
if not f.exists():
|
|
660
|
+
return ""
|
|
661
|
+
chunks: list[str] = []
|
|
662
|
+
current_heading: str | None = None
|
|
663
|
+
current_body: list[str] = []
|
|
664
|
+
for line in f.read_text(encoding="utf-8").splitlines():
|
|
665
|
+
if line.startswith("## "):
|
|
666
|
+
if current_heading and _heading_is_design(current_heading):
|
|
667
|
+
chunks.append(current_heading + "\n" + "\n".join(current_body).rstrip())
|
|
668
|
+
current_heading = line.strip()
|
|
669
|
+
current_body = []
|
|
670
|
+
continue
|
|
671
|
+
current_body.append(line)
|
|
672
|
+
if current_heading and _heading_is_design(current_heading):
|
|
673
|
+
chunks.append(current_heading + "\n" + "\n".join(current_body).rstrip())
|
|
674
|
+
return "\n\n".join(chunks).strip()
|
|
675
|
+
|
|
676
|
+
|
|
677
|
+
def _heading_is_design(heading: str) -> bool:
|
|
678
|
+
"""Return True for headings that capture design intent (status/architecture/invariants/etc.)."""
|
|
679
|
+
if heading in _DEEP_HEADINGS:
|
|
680
|
+
return True
|
|
681
|
+
low = heading.lower()
|
|
682
|
+
return any(k in low for k in _DEEP_KEYWORDS)
|
|
683
|
+
|
|
684
|
+
|
|
685
|
+
def _read_recent_commits(path: Path, *, n: int) -> list[str]:
|
|
686
|
+
"""Return up to ``n`` recent commit subjects via ``git log`` (empty list if no git)."""
|
|
687
|
+
if not (path / ".git").exists():
|
|
688
|
+
return []
|
|
689
|
+
try:
|
|
690
|
+
result = subprocess.run( # noqa: S603,S607 # nosec B603 B607
|
|
691
|
+
["git", "-C", str(path), "log", f"-{n}", "--pretty=format:%s"],
|
|
692
|
+
capture_output=True,
|
|
693
|
+
text=True,
|
|
694
|
+
check=False,
|
|
695
|
+
)
|
|
696
|
+
except FileNotFoundError:
|
|
697
|
+
return []
|
|
698
|
+
if result.returncode != 0:
|
|
699
|
+
return []
|
|
700
|
+
return [line for line in result.stdout.splitlines() if line.strip()]
|