kata-cli 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,25 @@
1
+ """seer.lookup — codebase classification + lookup verbs.
2
+
3
+ This package is the sibling of `seer.repo`: it answers "what kind of project
4
+ is this?" / "where is X?" rather than "tell me about this repo."
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from seer.lookup.ast_scope import Scope, find_enclosing, list_symbols
10
+ from seer.lookup.classify import classify
11
+ from seer.lookup.grep_context import grep_with_context, render_grep_markdown
12
+ from seer.lookup.recent_outline import recent_with_outline, render_recent_markdown
13
+ from seer.lookup.render import render_classify_markdown
14
+
15
+ __all__ = [
16
+ "classify",
17
+ "find_enclosing",
18
+ "grep_with_context",
19
+ "list_symbols",
20
+ "recent_with_outline",
21
+ "render_classify_markdown",
22
+ "render_grep_markdown",
23
+ "render_recent_markdown",
24
+ "Scope",
25
+ ]
@@ -0,0 +1,74 @@
1
+ """seer.lookup.ast_scope — AST-based scope resolver (stdlib ast only).
2
+
3
+ Provides:
4
+ Scope — frozen dataclass describing a named code scope.
5
+ list_symbols — collect all module-level + class-method scopes.
6
+ find_enclosing — smallest scope whose line range contains a given line.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import ast
12
+ from dataclasses import dataclass
13
+
14
+ __all__ = ["Scope", "list_symbols", "find_enclosing"]
15
+
16
+
17
+ @dataclass(frozen=True)
18
+ class Scope:
19
+ kind: str # "function" | "async_function" | "class"
20
+ name: str # qualified, e.g. "Foo.method_a"
21
+ start_line: int
22
+ end_line: int
23
+
24
+
25
+ def _scope_kind(node: ast.AST) -> str | None:
26
+ """Return the scope kind string for *node*, or ``None`` if not a named scope."""
27
+ if isinstance(node, ast.AsyncFunctionDef):
28
+ return "async_function"
29
+ if isinstance(node, ast.ClassDef):
30
+ return "class"
31
+ if isinstance(node, ast.FunctionDef):
32
+ return "function"
33
+ return None
34
+
35
+
36
+ def list_symbols(tree: ast.AST) -> list[Scope]:
37
+ """Walk *tree* and return one :class:`Scope` per named scope.
38
+
39
+ Covers:
40
+ - Module-level functions, async functions, and classes.
41
+ - Methods defined directly inside a class (one level of nesting per
42
+ class, but classes inside classes recurse so ``Outer.Inner.method``
43
+ is emitted correctly).
44
+
45
+ Does **not** recurse into function bodies.
46
+ """
47
+ out: list[Scope] = []
48
+
49
+ def visit(node: ast.AST, prefix: str = "") -> None:
50
+ for child in ast.iter_child_nodes(node):
51
+ kind = _scope_kind(child)
52
+ if kind is None:
53
+ continue
54
+ name = f"{prefix}{child.name}" # type: ignore[attr-defined]
55
+ end = child.end_lineno or child.lineno # type: ignore[attr-defined]
56
+ start = child.lineno # type: ignore[attr-defined]
57
+ out.append(Scope(kind=kind, name=name, start_line=start, end_line=end))
58
+ if isinstance(child, ast.ClassDef):
59
+ visit(child, prefix=f"{name}.")
60
+
61
+ visit(tree)
62
+ return out
63
+
64
+
65
+ def find_enclosing(tree: ast.AST, line: int) -> Scope | None:
66
+ """Return the smallest :class:`Scope` whose ``[start_line, end_line]``
67
+ contains *line*, or ``None`` for module-level lines.
68
+ """
69
+ best: Scope | None = None
70
+ for s in list_symbols(tree):
71
+ if s.start_line <= line <= s.end_line:
72
+ if best is None or (s.end_line - s.start_line) < (best.end_line - best.start_line):
73
+ best = s
74
+ return best
@@ -0,0 +1,301 @@
1
+ """Project-type classifier.
2
+
3
+ `classify(path)` returns a dict with `path`, `manifest`, `language`, and
4
+ `tags` (a list of `{name, evidence}` dicts). Per-tag rules are pure
5
+ functions of a `_Context` snapshot — one filesystem walk per call.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import tomllib
12
+ from dataclasses import dataclass, field
13
+ from pathlib import Path
14
+
15
+ from seer.cli._errors import EXIT_ENV_ERROR, EXIT_USER_ERROR, SeerError
16
+ from seer.repo.errors import malformed_pyproject
17
+
18
+
19
+ @dataclass
20
+ class _Context:
21
+ """Filesystem snapshot consumed by per-tag rules. One walk per classify() call."""
22
+
23
+ path: Path
24
+ pyproject: dict | None = None
25
+ package_json: dict | None = None
26
+ bash_scripts: list[Path] = field(default_factory=list)
27
+ has_dockerfile: bool = False
28
+ has_compose: bool = False
29
+ compose_filename: str | None = None
30
+ has_tests_dir: bool = False
31
+ workflow_files: list[Path] = field(default_factory=list)
32
+ has_culture_yaml: bool = False
33
+
34
+
35
+ _COMPOSE_FILENAMES = ("docker-compose.yml", "docker-compose.yaml", "compose.yml", "compose.yaml")
36
+
37
+
38
+ def _load_pyproject(path: Path) -> dict | None:
39
+ """Parse `path/pyproject.toml` or return None if absent.
40
+
41
+ Raises `SeerError(EXIT_ENV_ERROR)` if the file exists but is unreadable
42
+ (OS error, non-UTF8) or malformed (invalid TOML).
43
+ """
44
+ pyproject = path / "pyproject.toml"
45
+ if not pyproject.exists():
46
+ return None
47
+ try:
48
+ text = pyproject.read_text(encoding="utf-8")
49
+ except (OSError, UnicodeDecodeError) as e:
50
+ raise _pyproject_unreadable_error(pyproject, str(e)) from e
51
+ try:
52
+ return tomllib.loads(text)
53
+ except tomllib.TOMLDecodeError as e:
54
+ raise malformed_pyproject(pyproject, str(e)) from e
55
+
56
+
57
+ def _load_package_json(path: Path) -> dict | None:
58
+ """Parse `path/package.json` or return None if absent / unreadable / malformed.
59
+
60
+ Soft-fails on any read/decode/parse error — Node tools handle missing or
61
+ bad manifests gracefully and we follow the same "fail-soft for optional
62
+ sources" pattern here.
63
+ """
64
+ package_json = path / "package.json"
65
+ if not package_json.exists():
66
+ return None
67
+ try:
68
+ return json.loads(package_json.read_text(encoding="utf-8"))
69
+ except (OSError, UnicodeDecodeError, json.JSONDecodeError):
70
+ return None
71
+
72
+
73
+ def _detect_compose(path: Path) -> str | None:
74
+ """Return the first matching compose filename present at *path*, or None."""
75
+ for compose in _COMPOSE_FILENAMES:
76
+ if (path / compose).exists():
77
+ return compose
78
+ return None
79
+
80
+
81
+ def _build_context(path: Path) -> _Context:
82
+ """Walk *path* once and capture every signal the rule set needs."""
83
+ ctx = _Context(path=path)
84
+ ctx.pyproject = _load_pyproject(path)
85
+ ctx.package_json = _load_package_json(path)
86
+
87
+ scripts_dir = path / "scripts"
88
+ if scripts_dir.is_dir():
89
+ ctx.bash_scripts = sorted(p for p in scripts_dir.iterdir() if p.suffix == ".sh")
90
+
91
+ ctx.has_dockerfile = (path / "Dockerfile").exists()
92
+ compose = _detect_compose(path)
93
+ if compose is not None:
94
+ ctx.has_compose = True
95
+ ctx.compose_filename = compose
96
+
97
+ ctx.has_culture_yaml = (path / "culture.yaml").exists()
98
+ ctx.has_tests_dir = (path / "tests").is_dir()
99
+
100
+ workflows_dir = path / ".github" / "workflows"
101
+ if workflows_dir.is_dir():
102
+ ctx.workflow_files = sorted(
103
+ p for p in workflows_dir.iterdir() if p.suffix in (".yml", ".yaml")
104
+ )
105
+
106
+ return ctx
107
+
108
+
109
+ def _rule_python(ctx: _Context) -> dict[str, str] | None:
110
+ if ctx.pyproject is None:
111
+ return None
112
+ return {"name": "python", "evidence": "pyproject.toml present"}
113
+
114
+
115
+ def _rule_node(ctx: _Context) -> dict[str, str] | None:
116
+ if ctx.package_json is None:
117
+ return None
118
+ return {"name": "node", "evidence": "package.json present"}
119
+
120
+
121
+ def _rule_bash(ctx: _Context) -> dict[str, str] | None:
122
+ if ctx.pyproject is not None or ctx.package_json is not None:
123
+ return None
124
+ if not ctx.bash_scripts:
125
+ return None
126
+ n = len(ctx.bash_scripts)
127
+ file_word = "file" if n == 1 else "files"
128
+ return {
129
+ "name": "bash",
130
+ "evidence": f"scripts/ contains {n} .sh {file_word}; no Python/Node manifest",
131
+ }
132
+
133
+
134
+ def _rule_cli(ctx: _Context) -> dict[str, str] | None:
135
+ # Python: [project.scripts] non-empty.
136
+ if ctx.pyproject is not None:
137
+ scripts = (ctx.pyproject.get("project", {}) or {}).get("scripts", {}) or {}
138
+ if scripts:
139
+ entries = ", ".join(f'{k} = "{v}"' for k, v in scripts.items())
140
+ return {"name": "cli", "evidence": f"[project.scripts] defines {entries}"}
141
+ # Node: package.json `bin` non-empty (object or string).
142
+ if ctx.package_json is not None:
143
+ bin_field = ctx.package_json.get("bin")
144
+ if bin_field:
145
+ if isinstance(bin_field, dict):
146
+ names = ", ".join(bin_field.keys())
147
+ else:
148
+ names = ctx.package_json.get("name", "<unnamed>")
149
+ return {"name": "cli", "evidence": f"package.json bin defines {names}"}
150
+ return None
151
+
152
+
153
+ def _rule_library(ctx: _Context) -> dict[str, str] | None:
154
+ """Importable Python package: `<name>/__init__.py` or `src/<name>/__init__.py`."""
155
+ if ctx.pyproject is None:
156
+ return None
157
+ name = (ctx.pyproject.get("project", {}) or {}).get("name")
158
+ if not name:
159
+ return None
160
+ # PyPI normalises hyphen vs underscore; check both possible package dir names.
161
+ candidates = [name, name.replace("-", "_")]
162
+ for candidate in candidates:
163
+ flat = ctx.path / candidate / "__init__.py"
164
+ if flat.exists():
165
+ return {"name": "library", "evidence": f"`{candidate}/__init__.py` present"}
166
+ nested = ctx.path / "src" / candidate / "__init__.py"
167
+ if nested.exists():
168
+ return {"name": "library", "evidence": f"`src/{candidate}/__init__.py` present"}
169
+ return None
170
+
171
+
172
+ def _rule_dockerized(ctx: _Context) -> dict[str, str] | None:
173
+ if ctx.has_dockerfile:
174
+ return {"name": "dockerized", "evidence": "Dockerfile present"}
175
+ if ctx.has_compose and ctx.compose_filename:
176
+ return {"name": "dockerized", "evidence": f"{ctx.compose_filename} present"}
177
+ return None
178
+
179
+
180
+ def _rule_tested(ctx: _Context) -> dict[str, str] | None:
181
+ if not ctx.has_tests_dir:
182
+ return None
183
+ # Python path: pytest in [dependency-groups] dev
184
+ if ctx.pyproject is not None:
185
+ dev_deps = (ctx.pyproject.get("dependency-groups", {}) or {}).get("dev", []) or []
186
+ # Strip version spec: pytest>=8.0 -> pytest; pytest==8.1 -> pytest; etc.
187
+ dep_names = {d.split(">=")[0].split("==")[0].split("~=")[0].strip() for d in dev_deps}
188
+ if "pytest" in dep_names:
189
+ return {
190
+ "name": "tested",
191
+ "evidence": "tests/ exists; pytest in dependency-groups.dev",
192
+ }
193
+ # Node path: scripts.test defined
194
+ if ctx.package_json is not None:
195
+ scripts = ctx.package_json.get("scripts", {}) or {}
196
+ if scripts.get("test"):
197
+ return {
198
+ "name": "tested",
199
+ "evidence": f"tests/ exists; package.json scripts.test = {scripts['test']!r}",
200
+ }
201
+ return None
202
+
203
+
204
+ def _rule_packaged_pypi(ctx: _Context) -> dict[str, str] | None:
205
+ needles = ("pypi.org", "pypa/gh-action-pypi-publish")
206
+ for wf in ctx.workflow_files:
207
+ try:
208
+ text = wf.read_text(encoding="utf-8")
209
+ except (OSError, UnicodeDecodeError):
210
+ # Best-effort: skip unreadable / undecodable workflow files
211
+ # rather than aborting classification.
212
+ continue
213
+ if any(needle in text for needle in needles):
214
+ return {
215
+ "name": "packaged-pypi",
216
+ "evidence": f".github/workflows/{wf.name} uploads to pypi.org",
217
+ }
218
+ return None
219
+
220
+
221
+ def _rule_agentculture_sibling(ctx: _Context) -> dict[str, str] | None:
222
+ if ctx.has_culture_yaml:
223
+ return {"name": "agentculture-sibling", "evidence": "culture.yaml present"}
224
+ return None
225
+
226
+
227
+ _RULES = [
228
+ _rule_python,
229
+ _rule_node,
230
+ _rule_bash,
231
+ _rule_cli,
232
+ _rule_library,
233
+ _rule_dockerized,
234
+ _rule_tested,
235
+ _rule_packaged_pypi,
236
+ _rule_agentculture_sibling,
237
+ ]
238
+
239
+
240
+ def _path_not_found_error(p: Path) -> SeerError:
241
+ return SeerError(
242
+ code=EXIT_USER_ERROR,
243
+ kind="user_error",
244
+ message=f"path not found: {p}",
245
+ reason="classify expected a directory path that exists on disk.",
246
+ remediation="check the path argument and retry.",
247
+ )
248
+
249
+
250
+ def _path_not_a_directory_error(p: Path) -> SeerError:
251
+ return SeerError(
252
+ code=EXIT_USER_ERROR,
253
+ kind="user_error",
254
+ message=f"classify expects a directory, got file: {p}",
255
+ reason="classify operates on a repository root, not a single file.",
256
+ remediation="pass the parent directory.",
257
+ )
258
+
259
+
260
+ def _pyproject_unreadable_error(p: Path, detail: str) -> SeerError:
261
+ return SeerError(
262
+ code=EXIT_ENV_ERROR,
263
+ kind="env_error",
264
+ message=f"cannot read pyproject.toml at {p}",
265
+ reason=f"OS or decode error while reading the manifest: {detail}",
266
+ remediation=("check file permissions and confirm the file is valid UTF-8."),
267
+ )
268
+
269
+
270
+ def classify(path: Path) -> dict[str, object]:
271
+ """Return `{path, manifest, language, tags}` for the repo at *path*."""
272
+ if not path.exists():
273
+ raise _path_not_found_error(path)
274
+ if not path.is_dir():
275
+ raise _path_not_a_directory_error(path)
276
+
277
+ ctx = _build_context(path)
278
+ tags: list[dict[str, str]] = []
279
+ for rule in _RULES:
280
+ result = rule(ctx)
281
+ if result is not None:
282
+ tags.append(result)
283
+
284
+ # Manifest + language derivation. Python wins over Node when both present
285
+ # (see spec — polyglot caller should read the tag list, not the scalar).
286
+ if ctx.pyproject is not None:
287
+ manifest: str | None = "pyproject.toml"
288
+ language = "python"
289
+ elif ctx.package_json is not None:
290
+ manifest = "package.json"
291
+ language = "node"
292
+ else:
293
+ manifest = None
294
+ language = "unknown"
295
+
296
+ return {
297
+ "path": str(path),
298
+ "manifest": manifest,
299
+ "language": language,
300
+ "tags": tags,
301
+ }
@@ -0,0 +1,160 @@
1
+ """seer.lookup.grep_context — ripgrep-backed search with AST scope annotation.
2
+
3
+ Provides:
4
+ grep_with_context — run ``rg --json`` and pair every match with the
5
+ enclosing Python scope from the AST resolver.
6
+ render_grep_markdown — format grep results as a Markdown table.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import ast
12
+ import json
13
+ import subprocess # noqa: S404 # nosec B404
14
+ from pathlib import Path
15
+ from typing import Any
16
+
17
+ from seer.cli._errors import EXIT_ENV_ERROR, EXIT_USER_ERROR, SeerError
18
+ from seer.lookup.ast_scope import find_enclosing
19
+
20
+ __all__ = ["grep_with_context", "render_grep_markdown"]
21
+
22
+
23
+ def _run_rg(pattern: str, path: Path) -> "subprocess.CompletedProcess[str]":
24
+ """Run ``rg --json``; raise :class:`SeerError` on missing binary or exit code 2+."""
25
+ try:
26
+ result = subprocess.run( # noqa: S603,S607 # nosec B603 B607
27
+ ["rg", "--json", pattern, str(path)],
28
+ capture_output=True,
29
+ text=True,
30
+ check=False,
31
+ timeout=30,
32
+ )
33
+ except FileNotFoundError:
34
+ raise SeerError(
35
+ code=EXIT_ENV_ERROR,
36
+ kind="env_error",
37
+ message="`rg` not found on PATH",
38
+ reason="seer grep requires ripgrep (rg) for match-finding.",
39
+ remediation=("install ripgrep (e.g. `apt install ripgrep` or `brew install ripgrep`)."),
40
+ )
41
+ except subprocess.SubprocessError as exc:
42
+ raise SeerError(
43
+ code=EXIT_ENV_ERROR,
44
+ kind="env_error",
45
+ message=f"rg subprocess failed: {exc}",
46
+ )
47
+
48
+ # rg exits 1 when there are no matches — that is not an error.
49
+ if result.returncode >= 2:
50
+ stderr_snippet = result.stderr.strip()[:200]
51
+ raise SeerError(
52
+ code=EXIT_ENV_ERROR,
53
+ kind="env_error",
54
+ message=f"rg exited with code {result.returncode}",
55
+ reason=stderr_snippet or "rg reported an error.",
56
+ remediation="check that the pattern is a valid regex and the path is readable.",
57
+ )
58
+
59
+ return result
60
+
61
+
62
+ def _parse_rg_matches(stdout: str) -> "dict[str, list[dict[str, Any]]]":
63
+ """Group ``rg --json`` match events by file, preserving emission order."""
64
+ matches_by_file: dict[str, list[dict[str, Any]]] = {}
65
+ for raw_line in stdout.splitlines():
66
+ raw_line = raw_line.strip()
67
+ if not raw_line:
68
+ continue
69
+ try:
70
+ event = json.loads(raw_line)
71
+ except json.JSONDecodeError:
72
+ continue
73
+ if event.get("type") != "match":
74
+ continue
75
+ data = event.get("data", {})
76
+ file_str = (data.get("path") or {}).get("text", "")
77
+ line_num = data.get("line_number", 0)
78
+ line_text = ((data.get("lines") or {}).get("text") or "").rstrip("\n")
79
+ entry: dict[str, Any] = {
80
+ "file": file_str,
81
+ "line": line_num,
82
+ "scope": None, # filled in by _annotate_scopes
83
+ "text": line_text,
84
+ }
85
+ matches_by_file.setdefault(file_str, []).append(entry)
86
+ return matches_by_file
87
+
88
+
89
+ def _annotate_scopes(file_str: str, entries: "list[dict[str, Any]]") -> None:
90
+ """Resolve the enclosing Python scope for each entry in *entries* (in-place)."""
91
+ if not file_str.endswith(".py"):
92
+ return
93
+ try:
94
+ source = Path(file_str).read_text(encoding="utf-8")
95
+ tree = ast.parse(source)
96
+ except (SyntaxError, OSError, UnicodeDecodeError):
97
+ # Best-effort: leave scope=None for all matches in this file.
98
+ return
99
+ for entry in entries:
100
+ scope_obj = find_enclosing(tree, entry["line"])
101
+ entry["scope"] = scope_obj.name if scope_obj else None
102
+
103
+
104
+ def grep_with_context(pattern: str, path: str | Path) -> dict[str, Any]:
105
+ """Search *path* for *pattern* via ``rg --json`` and annotate each match.
106
+
107
+ Each match in the returned dict has the shape::
108
+
109
+ {"file": str, "line": int, "scope": str | None, "text": str}
110
+
111
+ ``scope`` is the qualified name of the enclosing function / method / class
112
+ for Python files (``None`` for module-level lines and all non-Python files).
113
+
114
+ Raises:
115
+ SeerError(EXIT_USER_ERROR): *path* does not exist.
116
+ SeerError(EXIT_ENV_ERROR): ``rg`` is not on PATH, or rg exits with
117
+ code 2+ (real error, not "no matches").
118
+ """
119
+ p = Path(path)
120
+ if not p.exists():
121
+ raise SeerError(
122
+ code=EXIT_USER_ERROR,
123
+ kind="user_error",
124
+ message=f"path not found: {path}",
125
+ remediation="pass an existing file or directory.",
126
+ )
127
+
128
+ result = _run_rg(pattern, p)
129
+ matches_by_file = _parse_rg_matches(result.stdout)
130
+
131
+ for file_str, entries in matches_by_file.items():
132
+ _annotate_scopes(file_str, entries)
133
+
134
+ all_matches = [entry for entries in matches_by_file.values() for entry in entries]
135
+ return {"pattern": pattern, "matches": all_matches}
136
+
137
+
138
+ def render_grep_markdown(data: dict[str, Any]) -> str:
139
+ """Render a :func:`grep_with_context` result dict as a Markdown table."""
140
+ lines: list[str] = []
141
+ pattern = data.get("pattern", "")
142
+ matches = data.get("matches") or []
143
+
144
+ lines.append(f"# grep: `{pattern}`")
145
+ lines.append("")
146
+
147
+ if not matches:
148
+ lines.append("_No matches found._")
149
+ return "\n".join(lines) + "\n"
150
+
151
+ lines.append("| File | Line | Scope | Text |")
152
+ lines.append("|---|---|---|---|")
153
+ for m in matches:
154
+ file_cell = m.get("file", "")
155
+ line_cell = str(m.get("line", ""))
156
+ scope_cell = m.get("scope") or "_module_"
157
+ text_cell = (m.get("text") or "").replace("|", "\\|")
158
+ lines.append(f"| {file_cell} | {line_cell} | {scope_cell} | {text_cell} |")
159
+
160
+ return "\n".join(lines) + "\n"