kata-cli 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kata_cli-0.7.0.dist-info/METADATA +36 -0
- kata_cli-0.7.0.dist-info/RECORD +33 -0
- kata_cli-0.7.0.dist-info/WHEEL +4 -0
- kata_cli-0.7.0.dist-info/entry_points.txt +3 -0
- kata_cli-0.7.0.dist-info/licenses/LICENSE +21 -0
- seer/__init__.py +34 -0
- seer/__main__.py +8 -0
- seer/cli/__init__.py +117 -0
- seer/cli/_commands/__init__.py +1 -0
- seer/cli/_commands/classify.py +40 -0
- seer/cli/_commands/explain.py +44 -0
- seer/cli/_commands/grep.py +44 -0
- seer/cli/_commands/learn.py +49 -0
- seer/cli/_commands/recent.py +52 -0
- seer/cli/_commands/whoami.py +42 -0
- seer/cli/_errors.py +59 -0
- seer/cli/_output.py +47 -0
- seer/lookup/__init__.py +25 -0
- seer/lookup/ast_scope.py +74 -0
- seer/lookup/classify.py +301 -0
- seer/lookup/grep_context.py +160 -0
- seer/lookup/recent_outline.py +304 -0
- seer/lookup/render.py +41 -0
- seer/repo/__init__.py +9 -0
- seer/repo/__main__.py +228 -0
- seer/repo/config.py +57 -0
- seer/repo/connections.py +298 -0
- seer/repo/detect.py +86 -0
- seer/repo/errors.py +81 -0
- seer/repo/graph.py +182 -0
- seer/repo/manifest.py +36 -0
- seer/repo/profile.py +700 -0
- seer/repo/render.py +470 -0
seer/lookup/__init__.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""seer.lookup — codebase classification + lookup verbs.
|
|
2
|
+
|
|
3
|
+
This package is the sibling of `seer.repo`: it answers "what kind of project
|
|
4
|
+
is this?" / "where is X?" rather than "tell me about this repo."
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from seer.lookup.ast_scope import Scope, find_enclosing, list_symbols
|
|
10
|
+
from seer.lookup.classify import classify
|
|
11
|
+
from seer.lookup.grep_context import grep_with_context, render_grep_markdown
|
|
12
|
+
from seer.lookup.recent_outline import recent_with_outline, render_recent_markdown
|
|
13
|
+
from seer.lookup.render import render_classify_markdown
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"classify",
|
|
17
|
+
"find_enclosing",
|
|
18
|
+
"grep_with_context",
|
|
19
|
+
"list_symbols",
|
|
20
|
+
"recent_with_outline",
|
|
21
|
+
"render_classify_markdown",
|
|
22
|
+
"render_grep_markdown",
|
|
23
|
+
"render_recent_markdown",
|
|
24
|
+
"Scope",
|
|
25
|
+
]
|
seer/lookup/ast_scope.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""seer.lookup.ast_scope — AST-based scope resolver (stdlib ast only).
|
|
2
|
+
|
|
3
|
+
Provides:
|
|
4
|
+
Scope — frozen dataclass describing a named code scope.
|
|
5
|
+
list_symbols — collect all module-level + class-method scopes.
|
|
6
|
+
find_enclosing — smallest scope whose line range contains a given line.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import ast
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
|
|
14
|
+
__all__ = ["Scope", "list_symbols", "find_enclosing"]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass(frozen=True)
|
|
18
|
+
class Scope:
|
|
19
|
+
kind: str # "function" | "async_function" | "class"
|
|
20
|
+
name: str # qualified, e.g. "Foo.method_a"
|
|
21
|
+
start_line: int
|
|
22
|
+
end_line: int
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _scope_kind(node: ast.AST) -> str | None:
|
|
26
|
+
"""Return the scope kind string for *node*, or ``None`` if not a named scope."""
|
|
27
|
+
if isinstance(node, ast.AsyncFunctionDef):
|
|
28
|
+
return "async_function"
|
|
29
|
+
if isinstance(node, ast.ClassDef):
|
|
30
|
+
return "class"
|
|
31
|
+
if isinstance(node, ast.FunctionDef):
|
|
32
|
+
return "function"
|
|
33
|
+
return None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def list_symbols(tree: ast.AST) -> list[Scope]:
|
|
37
|
+
"""Walk *tree* and return one :class:`Scope` per named scope.
|
|
38
|
+
|
|
39
|
+
Covers:
|
|
40
|
+
- Module-level functions, async functions, and classes.
|
|
41
|
+
- Methods defined directly inside a class (one level of nesting per
|
|
42
|
+
class, but classes inside classes recurse so ``Outer.Inner.method``
|
|
43
|
+
is emitted correctly).
|
|
44
|
+
|
|
45
|
+
Does **not** recurse into function bodies.
|
|
46
|
+
"""
|
|
47
|
+
out: list[Scope] = []
|
|
48
|
+
|
|
49
|
+
def visit(node: ast.AST, prefix: str = "") -> None:
|
|
50
|
+
for child in ast.iter_child_nodes(node):
|
|
51
|
+
kind = _scope_kind(child)
|
|
52
|
+
if kind is None:
|
|
53
|
+
continue
|
|
54
|
+
name = f"{prefix}{child.name}" # type: ignore[attr-defined]
|
|
55
|
+
end = child.end_lineno or child.lineno # type: ignore[attr-defined]
|
|
56
|
+
start = child.lineno # type: ignore[attr-defined]
|
|
57
|
+
out.append(Scope(kind=kind, name=name, start_line=start, end_line=end))
|
|
58
|
+
if isinstance(child, ast.ClassDef):
|
|
59
|
+
visit(child, prefix=f"{name}.")
|
|
60
|
+
|
|
61
|
+
visit(tree)
|
|
62
|
+
return out
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def find_enclosing(tree: ast.AST, line: int) -> Scope | None:
|
|
66
|
+
"""Return the smallest :class:`Scope` whose ``[start_line, end_line]``
|
|
67
|
+
contains *line*, or ``None`` for module-level lines.
|
|
68
|
+
"""
|
|
69
|
+
best: Scope | None = None
|
|
70
|
+
for s in list_symbols(tree):
|
|
71
|
+
if s.start_line <= line <= s.end_line:
|
|
72
|
+
if best is None or (s.end_line - s.start_line) < (best.end_line - best.start_line):
|
|
73
|
+
best = s
|
|
74
|
+
return best
|
seer/lookup/classify.py
ADDED
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
"""Project-type classifier.
|
|
2
|
+
|
|
3
|
+
`classify(path)` returns a dict with `path`, `manifest`, `language`, and
|
|
4
|
+
`tags` (a list of `{name, evidence}` dicts). Per-tag rules are pure
|
|
5
|
+
functions of a `_Context` snapshot — one filesystem walk per call.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import tomllib
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
from seer.cli._errors import EXIT_ENV_ERROR, EXIT_USER_ERROR, SeerError
|
|
16
|
+
from seer.repo.errors import malformed_pyproject
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class _Context:
|
|
21
|
+
"""Filesystem snapshot consumed by per-tag rules. One walk per classify() call."""
|
|
22
|
+
|
|
23
|
+
path: Path
|
|
24
|
+
pyproject: dict | None = None
|
|
25
|
+
package_json: dict | None = None
|
|
26
|
+
bash_scripts: list[Path] = field(default_factory=list)
|
|
27
|
+
has_dockerfile: bool = False
|
|
28
|
+
has_compose: bool = False
|
|
29
|
+
compose_filename: str | None = None
|
|
30
|
+
has_tests_dir: bool = False
|
|
31
|
+
workflow_files: list[Path] = field(default_factory=list)
|
|
32
|
+
has_culture_yaml: bool = False
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
_COMPOSE_FILENAMES = ("docker-compose.yml", "docker-compose.yaml", "compose.yml", "compose.yaml")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _load_pyproject(path: Path) -> dict | None:
|
|
39
|
+
"""Parse `path/pyproject.toml` or return None if absent.
|
|
40
|
+
|
|
41
|
+
Raises `SeerError(EXIT_ENV_ERROR)` if the file exists but is unreadable
|
|
42
|
+
(OS error, non-UTF8) or malformed (invalid TOML).
|
|
43
|
+
"""
|
|
44
|
+
pyproject = path / "pyproject.toml"
|
|
45
|
+
if not pyproject.exists():
|
|
46
|
+
return None
|
|
47
|
+
try:
|
|
48
|
+
text = pyproject.read_text(encoding="utf-8")
|
|
49
|
+
except (OSError, UnicodeDecodeError) as e:
|
|
50
|
+
raise _pyproject_unreadable_error(pyproject, str(e)) from e
|
|
51
|
+
try:
|
|
52
|
+
return tomllib.loads(text)
|
|
53
|
+
except tomllib.TOMLDecodeError as e:
|
|
54
|
+
raise malformed_pyproject(pyproject, str(e)) from e
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _load_package_json(path: Path) -> dict | None:
|
|
58
|
+
"""Parse `path/package.json` or return None if absent / unreadable / malformed.
|
|
59
|
+
|
|
60
|
+
Soft-fails on any read/decode/parse error — Node tools handle missing or
|
|
61
|
+
bad manifests gracefully and we follow the same "fail-soft for optional
|
|
62
|
+
sources" pattern here.
|
|
63
|
+
"""
|
|
64
|
+
package_json = path / "package.json"
|
|
65
|
+
if not package_json.exists():
|
|
66
|
+
return None
|
|
67
|
+
try:
|
|
68
|
+
return json.loads(package_json.read_text(encoding="utf-8"))
|
|
69
|
+
except (OSError, UnicodeDecodeError, json.JSONDecodeError):
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _detect_compose(path: Path) -> str | None:
|
|
74
|
+
"""Return the first matching compose filename present at *path*, or None."""
|
|
75
|
+
for compose in _COMPOSE_FILENAMES:
|
|
76
|
+
if (path / compose).exists():
|
|
77
|
+
return compose
|
|
78
|
+
return None
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _build_context(path: Path) -> _Context:
|
|
82
|
+
"""Walk *path* once and capture every signal the rule set needs."""
|
|
83
|
+
ctx = _Context(path=path)
|
|
84
|
+
ctx.pyproject = _load_pyproject(path)
|
|
85
|
+
ctx.package_json = _load_package_json(path)
|
|
86
|
+
|
|
87
|
+
scripts_dir = path / "scripts"
|
|
88
|
+
if scripts_dir.is_dir():
|
|
89
|
+
ctx.bash_scripts = sorted(p for p in scripts_dir.iterdir() if p.suffix == ".sh")
|
|
90
|
+
|
|
91
|
+
ctx.has_dockerfile = (path / "Dockerfile").exists()
|
|
92
|
+
compose = _detect_compose(path)
|
|
93
|
+
if compose is not None:
|
|
94
|
+
ctx.has_compose = True
|
|
95
|
+
ctx.compose_filename = compose
|
|
96
|
+
|
|
97
|
+
ctx.has_culture_yaml = (path / "culture.yaml").exists()
|
|
98
|
+
ctx.has_tests_dir = (path / "tests").is_dir()
|
|
99
|
+
|
|
100
|
+
workflows_dir = path / ".github" / "workflows"
|
|
101
|
+
if workflows_dir.is_dir():
|
|
102
|
+
ctx.workflow_files = sorted(
|
|
103
|
+
p for p in workflows_dir.iterdir() if p.suffix in (".yml", ".yaml")
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
return ctx
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _rule_python(ctx: _Context) -> dict[str, str] | None:
|
|
110
|
+
if ctx.pyproject is None:
|
|
111
|
+
return None
|
|
112
|
+
return {"name": "python", "evidence": "pyproject.toml present"}
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _rule_node(ctx: _Context) -> dict[str, str] | None:
|
|
116
|
+
if ctx.package_json is None:
|
|
117
|
+
return None
|
|
118
|
+
return {"name": "node", "evidence": "package.json present"}
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _rule_bash(ctx: _Context) -> dict[str, str] | None:
|
|
122
|
+
if ctx.pyproject is not None or ctx.package_json is not None:
|
|
123
|
+
return None
|
|
124
|
+
if not ctx.bash_scripts:
|
|
125
|
+
return None
|
|
126
|
+
n = len(ctx.bash_scripts)
|
|
127
|
+
file_word = "file" if n == 1 else "files"
|
|
128
|
+
return {
|
|
129
|
+
"name": "bash",
|
|
130
|
+
"evidence": f"scripts/ contains {n} .sh {file_word}; no Python/Node manifest",
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _rule_cli(ctx: _Context) -> dict[str, str] | None:
|
|
135
|
+
# Python: [project.scripts] non-empty.
|
|
136
|
+
if ctx.pyproject is not None:
|
|
137
|
+
scripts = (ctx.pyproject.get("project", {}) or {}).get("scripts", {}) or {}
|
|
138
|
+
if scripts:
|
|
139
|
+
entries = ", ".join(f'{k} = "{v}"' for k, v in scripts.items())
|
|
140
|
+
return {"name": "cli", "evidence": f"[project.scripts] defines {entries}"}
|
|
141
|
+
# Node: package.json `bin` non-empty (object or string).
|
|
142
|
+
if ctx.package_json is not None:
|
|
143
|
+
bin_field = ctx.package_json.get("bin")
|
|
144
|
+
if bin_field:
|
|
145
|
+
if isinstance(bin_field, dict):
|
|
146
|
+
names = ", ".join(bin_field.keys())
|
|
147
|
+
else:
|
|
148
|
+
names = ctx.package_json.get("name", "<unnamed>")
|
|
149
|
+
return {"name": "cli", "evidence": f"package.json bin defines {names}"}
|
|
150
|
+
return None
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _rule_library(ctx: _Context) -> dict[str, str] | None:
|
|
154
|
+
"""Importable Python package: `<name>/__init__.py` or `src/<name>/__init__.py`."""
|
|
155
|
+
if ctx.pyproject is None:
|
|
156
|
+
return None
|
|
157
|
+
name = (ctx.pyproject.get("project", {}) or {}).get("name")
|
|
158
|
+
if not name:
|
|
159
|
+
return None
|
|
160
|
+
# PyPI normalises hyphen vs underscore; check both possible package dir names.
|
|
161
|
+
candidates = [name, name.replace("-", "_")]
|
|
162
|
+
for candidate in candidates:
|
|
163
|
+
flat = ctx.path / candidate / "__init__.py"
|
|
164
|
+
if flat.exists():
|
|
165
|
+
return {"name": "library", "evidence": f"`{candidate}/__init__.py` present"}
|
|
166
|
+
nested = ctx.path / "src" / candidate / "__init__.py"
|
|
167
|
+
if nested.exists():
|
|
168
|
+
return {"name": "library", "evidence": f"`src/{candidate}/__init__.py` present"}
|
|
169
|
+
return None
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _rule_dockerized(ctx: _Context) -> dict[str, str] | None:
|
|
173
|
+
if ctx.has_dockerfile:
|
|
174
|
+
return {"name": "dockerized", "evidence": "Dockerfile present"}
|
|
175
|
+
if ctx.has_compose and ctx.compose_filename:
|
|
176
|
+
return {"name": "dockerized", "evidence": f"{ctx.compose_filename} present"}
|
|
177
|
+
return None
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _rule_tested(ctx: _Context) -> dict[str, str] | None:
|
|
181
|
+
if not ctx.has_tests_dir:
|
|
182
|
+
return None
|
|
183
|
+
# Python path: pytest in [dependency-groups] dev
|
|
184
|
+
if ctx.pyproject is not None:
|
|
185
|
+
dev_deps = (ctx.pyproject.get("dependency-groups", {}) or {}).get("dev", []) or []
|
|
186
|
+
# Strip version spec: pytest>=8.0 -> pytest; pytest==8.1 -> pytest; etc.
|
|
187
|
+
dep_names = {d.split(">=")[0].split("==")[0].split("~=")[0].strip() for d in dev_deps}
|
|
188
|
+
if "pytest" in dep_names:
|
|
189
|
+
return {
|
|
190
|
+
"name": "tested",
|
|
191
|
+
"evidence": "tests/ exists; pytest in dependency-groups.dev",
|
|
192
|
+
}
|
|
193
|
+
# Node path: scripts.test defined
|
|
194
|
+
if ctx.package_json is not None:
|
|
195
|
+
scripts = ctx.package_json.get("scripts", {}) or {}
|
|
196
|
+
if scripts.get("test"):
|
|
197
|
+
return {
|
|
198
|
+
"name": "tested",
|
|
199
|
+
"evidence": f"tests/ exists; package.json scripts.test = {scripts['test']!r}",
|
|
200
|
+
}
|
|
201
|
+
return None
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _rule_packaged_pypi(ctx: _Context) -> dict[str, str] | None:
|
|
205
|
+
needles = ("pypi.org", "pypa/gh-action-pypi-publish")
|
|
206
|
+
for wf in ctx.workflow_files:
|
|
207
|
+
try:
|
|
208
|
+
text = wf.read_text(encoding="utf-8")
|
|
209
|
+
except (OSError, UnicodeDecodeError):
|
|
210
|
+
# Best-effort: skip unreadable / undecodable workflow files
|
|
211
|
+
# rather than aborting classification.
|
|
212
|
+
continue
|
|
213
|
+
if any(needle in text for needle in needles):
|
|
214
|
+
return {
|
|
215
|
+
"name": "packaged-pypi",
|
|
216
|
+
"evidence": f".github/workflows/{wf.name} uploads to pypi.org",
|
|
217
|
+
}
|
|
218
|
+
return None
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _rule_agentculture_sibling(ctx: _Context) -> dict[str, str] | None:
|
|
222
|
+
if ctx.has_culture_yaml:
|
|
223
|
+
return {"name": "agentculture-sibling", "evidence": "culture.yaml present"}
|
|
224
|
+
return None
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
_RULES = [
|
|
228
|
+
_rule_python,
|
|
229
|
+
_rule_node,
|
|
230
|
+
_rule_bash,
|
|
231
|
+
_rule_cli,
|
|
232
|
+
_rule_library,
|
|
233
|
+
_rule_dockerized,
|
|
234
|
+
_rule_tested,
|
|
235
|
+
_rule_packaged_pypi,
|
|
236
|
+
_rule_agentculture_sibling,
|
|
237
|
+
]
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _path_not_found_error(p: Path) -> SeerError:
|
|
241
|
+
return SeerError(
|
|
242
|
+
code=EXIT_USER_ERROR,
|
|
243
|
+
kind="user_error",
|
|
244
|
+
message=f"path not found: {p}",
|
|
245
|
+
reason="classify expected a directory path that exists on disk.",
|
|
246
|
+
remediation="check the path argument and retry.",
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def _path_not_a_directory_error(p: Path) -> SeerError:
|
|
251
|
+
return SeerError(
|
|
252
|
+
code=EXIT_USER_ERROR,
|
|
253
|
+
kind="user_error",
|
|
254
|
+
message=f"classify expects a directory, got file: {p}",
|
|
255
|
+
reason="classify operates on a repository root, not a single file.",
|
|
256
|
+
remediation="pass the parent directory.",
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def _pyproject_unreadable_error(p: Path, detail: str) -> SeerError:
|
|
261
|
+
return SeerError(
|
|
262
|
+
code=EXIT_ENV_ERROR,
|
|
263
|
+
kind="env_error",
|
|
264
|
+
message=f"cannot read pyproject.toml at {p}",
|
|
265
|
+
reason=f"OS or decode error while reading the manifest: {detail}",
|
|
266
|
+
remediation=("check file permissions and confirm the file is valid UTF-8."),
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def classify(path: Path) -> dict[str, object]:
|
|
271
|
+
"""Return `{path, manifest, language, tags}` for the repo at *path*."""
|
|
272
|
+
if not path.exists():
|
|
273
|
+
raise _path_not_found_error(path)
|
|
274
|
+
if not path.is_dir():
|
|
275
|
+
raise _path_not_a_directory_error(path)
|
|
276
|
+
|
|
277
|
+
ctx = _build_context(path)
|
|
278
|
+
tags: list[dict[str, str]] = []
|
|
279
|
+
for rule in _RULES:
|
|
280
|
+
result = rule(ctx)
|
|
281
|
+
if result is not None:
|
|
282
|
+
tags.append(result)
|
|
283
|
+
|
|
284
|
+
# Manifest + language derivation. Python wins over Node when both present
|
|
285
|
+
# (see spec — polyglot caller should read the tag list, not the scalar).
|
|
286
|
+
if ctx.pyproject is not None:
|
|
287
|
+
manifest: str | None = "pyproject.toml"
|
|
288
|
+
language = "python"
|
|
289
|
+
elif ctx.package_json is not None:
|
|
290
|
+
manifest = "package.json"
|
|
291
|
+
language = "node"
|
|
292
|
+
else:
|
|
293
|
+
manifest = None
|
|
294
|
+
language = "unknown"
|
|
295
|
+
|
|
296
|
+
return {
|
|
297
|
+
"path": str(path),
|
|
298
|
+
"manifest": manifest,
|
|
299
|
+
"language": language,
|
|
300
|
+
"tags": tags,
|
|
301
|
+
}
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""seer.lookup.grep_context — ripgrep-backed search with AST scope annotation.
|
|
2
|
+
|
|
3
|
+
Provides:
|
|
4
|
+
grep_with_context — run ``rg --json`` and pair every match with the
|
|
5
|
+
enclosing Python scope from the AST resolver.
|
|
6
|
+
render_grep_markdown — format grep results as a Markdown table.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import ast
|
|
12
|
+
import json
|
|
13
|
+
import subprocess # noqa: S404 # nosec B404
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
from seer.cli._errors import EXIT_ENV_ERROR, EXIT_USER_ERROR, SeerError
|
|
18
|
+
from seer.lookup.ast_scope import find_enclosing
|
|
19
|
+
|
|
20
|
+
__all__ = ["grep_with_context", "render_grep_markdown"]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _run_rg(pattern: str, path: Path) -> "subprocess.CompletedProcess[str]":
|
|
24
|
+
"""Run ``rg --json``; raise :class:`SeerError` on missing binary or exit code 2+."""
|
|
25
|
+
try:
|
|
26
|
+
result = subprocess.run( # noqa: S603,S607 # nosec B603 B607
|
|
27
|
+
["rg", "--json", pattern, str(path)],
|
|
28
|
+
capture_output=True,
|
|
29
|
+
text=True,
|
|
30
|
+
check=False,
|
|
31
|
+
timeout=30,
|
|
32
|
+
)
|
|
33
|
+
except FileNotFoundError:
|
|
34
|
+
raise SeerError(
|
|
35
|
+
code=EXIT_ENV_ERROR,
|
|
36
|
+
kind="env_error",
|
|
37
|
+
message="`rg` not found on PATH",
|
|
38
|
+
reason="seer grep requires ripgrep (rg) for match-finding.",
|
|
39
|
+
remediation=("install ripgrep (e.g. `apt install ripgrep` or `brew install ripgrep`)."),
|
|
40
|
+
)
|
|
41
|
+
except subprocess.SubprocessError as exc:
|
|
42
|
+
raise SeerError(
|
|
43
|
+
code=EXIT_ENV_ERROR,
|
|
44
|
+
kind="env_error",
|
|
45
|
+
message=f"rg subprocess failed: {exc}",
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# rg exits 1 when there are no matches — that is not an error.
|
|
49
|
+
if result.returncode >= 2:
|
|
50
|
+
stderr_snippet = result.stderr.strip()[:200]
|
|
51
|
+
raise SeerError(
|
|
52
|
+
code=EXIT_ENV_ERROR,
|
|
53
|
+
kind="env_error",
|
|
54
|
+
message=f"rg exited with code {result.returncode}",
|
|
55
|
+
reason=stderr_snippet or "rg reported an error.",
|
|
56
|
+
remediation="check that the pattern is a valid regex and the path is readable.",
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
return result
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _parse_rg_matches(stdout: str) -> "dict[str, list[dict[str, Any]]]":
|
|
63
|
+
"""Group ``rg --json`` match events by file, preserving emission order."""
|
|
64
|
+
matches_by_file: dict[str, list[dict[str, Any]]] = {}
|
|
65
|
+
for raw_line in stdout.splitlines():
|
|
66
|
+
raw_line = raw_line.strip()
|
|
67
|
+
if not raw_line:
|
|
68
|
+
continue
|
|
69
|
+
try:
|
|
70
|
+
event = json.loads(raw_line)
|
|
71
|
+
except json.JSONDecodeError:
|
|
72
|
+
continue
|
|
73
|
+
if event.get("type") != "match":
|
|
74
|
+
continue
|
|
75
|
+
data = event.get("data", {})
|
|
76
|
+
file_str = (data.get("path") or {}).get("text", "")
|
|
77
|
+
line_num = data.get("line_number", 0)
|
|
78
|
+
line_text = ((data.get("lines") or {}).get("text") or "").rstrip("\n")
|
|
79
|
+
entry: dict[str, Any] = {
|
|
80
|
+
"file": file_str,
|
|
81
|
+
"line": line_num,
|
|
82
|
+
"scope": None, # filled in by _annotate_scopes
|
|
83
|
+
"text": line_text,
|
|
84
|
+
}
|
|
85
|
+
matches_by_file.setdefault(file_str, []).append(entry)
|
|
86
|
+
return matches_by_file
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _annotate_scopes(file_str: str, entries: "list[dict[str, Any]]") -> None:
|
|
90
|
+
"""Resolve the enclosing Python scope for each entry in *entries* (in-place)."""
|
|
91
|
+
if not file_str.endswith(".py"):
|
|
92
|
+
return
|
|
93
|
+
try:
|
|
94
|
+
source = Path(file_str).read_text(encoding="utf-8")
|
|
95
|
+
tree = ast.parse(source)
|
|
96
|
+
except (SyntaxError, OSError, UnicodeDecodeError):
|
|
97
|
+
# Best-effort: leave scope=None for all matches in this file.
|
|
98
|
+
return
|
|
99
|
+
for entry in entries:
|
|
100
|
+
scope_obj = find_enclosing(tree, entry["line"])
|
|
101
|
+
entry["scope"] = scope_obj.name if scope_obj else None
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def grep_with_context(pattern: str, path: str | Path) -> dict[str, Any]:
|
|
105
|
+
"""Search *path* for *pattern* via ``rg --json`` and annotate each match.
|
|
106
|
+
|
|
107
|
+
Each match in the returned dict has the shape::
|
|
108
|
+
|
|
109
|
+
{"file": str, "line": int, "scope": str | None, "text": str}
|
|
110
|
+
|
|
111
|
+
``scope`` is the qualified name of the enclosing function / method / class
|
|
112
|
+
for Python files (``None`` for module-level lines and all non-Python files).
|
|
113
|
+
|
|
114
|
+
Raises:
|
|
115
|
+
SeerError(EXIT_USER_ERROR): *path* does not exist.
|
|
116
|
+
SeerError(EXIT_ENV_ERROR): ``rg`` is not on PATH, or rg exits with
|
|
117
|
+
code 2+ (real error, not "no matches").
|
|
118
|
+
"""
|
|
119
|
+
p = Path(path)
|
|
120
|
+
if not p.exists():
|
|
121
|
+
raise SeerError(
|
|
122
|
+
code=EXIT_USER_ERROR,
|
|
123
|
+
kind="user_error",
|
|
124
|
+
message=f"path not found: {path}",
|
|
125
|
+
remediation="pass an existing file or directory.",
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
result = _run_rg(pattern, p)
|
|
129
|
+
matches_by_file = _parse_rg_matches(result.stdout)
|
|
130
|
+
|
|
131
|
+
for file_str, entries in matches_by_file.items():
|
|
132
|
+
_annotate_scopes(file_str, entries)
|
|
133
|
+
|
|
134
|
+
all_matches = [entry for entries in matches_by_file.values() for entry in entries]
|
|
135
|
+
return {"pattern": pattern, "matches": all_matches}
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def render_grep_markdown(data: dict[str, Any]) -> str:
|
|
139
|
+
"""Render a :func:`grep_with_context` result dict as a Markdown table."""
|
|
140
|
+
lines: list[str] = []
|
|
141
|
+
pattern = data.get("pattern", "")
|
|
142
|
+
matches = data.get("matches") or []
|
|
143
|
+
|
|
144
|
+
lines.append(f"# grep: `{pattern}`")
|
|
145
|
+
lines.append("")
|
|
146
|
+
|
|
147
|
+
if not matches:
|
|
148
|
+
lines.append("_No matches found._")
|
|
149
|
+
return "\n".join(lines) + "\n"
|
|
150
|
+
|
|
151
|
+
lines.append("| File | Line | Scope | Text |")
|
|
152
|
+
lines.append("|---|---|---|---|")
|
|
153
|
+
for m in matches:
|
|
154
|
+
file_cell = m.get("file", "")
|
|
155
|
+
line_cell = str(m.get("line", ""))
|
|
156
|
+
scope_cell = m.get("scope") or "_module_"
|
|
157
|
+
text_cell = (m.get("text") or "").replace("|", "\\|")
|
|
158
|
+
lines.append(f"| {file_cell} | {line_cell} | {scope_cell} | {text_cell} |")
|
|
159
|
+
|
|
160
|
+
return "\n".join(lines) + "\n"
|