loom-code 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- loom_code/__init__.py +22 -0
- loom_code/_post_commit.py +119 -0
- loom_code/agent.py +544 -0
- loom_code/approval.py +616 -0
- loom_code/browse/__init__.py +291 -0
- loom_code/browse/act.py +467 -0
- loom_code/browse/observe.py +249 -0
- loom_code/browse/session.py +96 -0
- loom_code/browse/verify.py +194 -0
- loom_code/checkpoint.py +283 -0
- loom_code/cli.py +495 -0
- loom_code/code_index.py +703 -0
- loom_code/compact.py +143 -0
- loom_code/consent.py +47 -0
- loom_code/credentials.py +527 -0
- loom_code/edit_tool.py +635 -0
- loom_code/extensions.py +522 -0
- loom_code/file_history.py +322 -0
- loom_code/file_tools.py +93 -0
- loom_code/git_hook.py +200 -0
- loom_code/grep_tool.py +430 -0
- loom_code/hooks.py +297 -0
- loom_code/loominit/__init__.py +23 -0
- loom_code/loominit/_ast_walk.py +429 -0
- loom_code/loominit/_files.py +284 -0
- loom_code/loominit/_graph.py +141 -0
- loom_code/loominit/_resolve.py +392 -0
- loom_code/loominit/_tests_map.py +108 -0
- loom_code/loominit/extractor.py +332 -0
- loom_code/loominit/repomap.py +225 -0
- loom_code/loominit/schema.py +242 -0
- loom_code/lsp_tools.py +396 -0
- loom_code/mcp_host.py +79 -0
- loom_code/operator.py +449 -0
- loom_code/paste.py +97 -0
- loom_code/paths.py +52 -0
- loom_code/permissions.py +177 -0
- loom_code/project.py +104 -0
- loom_code/prompts.py +451 -0
- loom_code/render.py +783 -0
- loom_code/repl.py +4080 -0
- loom_code/rules.py +267 -0
- loom_code/sandboxed_bash.py +176 -0
- loom_code/scribe.py +88 -0
- loom_code/skills/__init__.py +16 -0
- loom_code/skills/graphify/SKILL.md +97 -0
- loom_code/skills/graphify/tools.py +570 -0
- loom_code/trust.py +216 -0
- loom_code/turn.py +169 -0
- loom_code/web_fetch.py +370 -0
- loom_code/workers.py +758 -0
- loom_code/worktree.py +134 -0
- loom_code-0.1.1.dist-info/METADATA +224 -0
- loom_code-0.1.1.dist-info/RECORD +58 -0
- loom_code-0.1.1.dist-info/WHEEL +5 -0
- loom_code-0.1.1.dist-info/entry_points.txt +2 -0
- loom_code-0.1.1.dist-info/licenses/LICENSE +21 -0
- loom_code-0.1.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,392 @@
|
|
|
1
|
+
"""Cross-file resolution — imports, API surface, entry points.
|
|
2
|
+
|
|
3
|
+
Three things only resolvable once we have the full file set:
|
|
4
|
+
|
|
5
|
+
1. **Import resolution.** ``from foo.bar import x`` is a dotted
|
|
6
|
+
module name in :mod:`_ast_walk`'s output; here we map it to a
|
|
7
|
+
file path within the repo (or mark unresolved for third-party /
|
|
8
|
+
stdlib imports). Relative imports (``from .. import x``) need
|
|
9
|
+
the importing file's package depth to resolve.
|
|
10
|
+
|
|
11
|
+
2. **API surface.** A symbol is in the "API surface" when reachable
|
|
12
|
+
from a package's ``__init__.py`` — either re-exported by a
|
|
13
|
+
``from .module import X`` line or named in ``__all__``. Agents
|
|
14
|
+
over-read internals; flagging API symbols lets the annotator
|
|
15
|
+
default to the public surface.
|
|
16
|
+
|
|
17
|
+
3. **Entry points.** ``pyproject.toml [project.scripts]`` gives us
|
|
18
|
+
``loom-code = "loom_code.cli:main"``-style entries directly. We
|
|
19
|
+
also surface ``if __name__ == "__main__":`` blocks (mined from
|
|
20
|
+
:mod:`_ast_walk`'s symbol output via a second AST pass) and any
|
|
21
|
+
landmark decorators (``@click.command``, etc.) collected during
|
|
22
|
+
the walk.
|
|
23
|
+
|
|
24
|
+
All three live here because they share machinery — they all need
|
|
25
|
+
to map dotted module names to repo-relative file paths.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import ast
|
|
31
|
+
import tomllib
|
|
32
|
+
from dataclasses import dataclass
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
|
|
35
|
+
from ._ast_walk import _RawDecorator
|
|
36
|
+
from ._files import DiscoveredFile
|
|
37
|
+
from .schema import EntryPoint, ImportEdge
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass(frozen=True)
|
|
41
|
+
class _ModuleIndex:
|
|
42
|
+
"""Maps dotted module names to repo-relative file paths. Built
|
|
43
|
+
once from the file list, queried by every resolution step.
|
|
44
|
+
|
|
45
|
+
Two kinds of mapping:
|
|
46
|
+
|
|
47
|
+
* ``modules[dotted] = rel_path`` — e.g.
|
|
48
|
+
``"loom_code.cli" -> "loom_code/cli.py"``
|
|
49
|
+
* ``packages[dotted] = init_rel_path`` — e.g.
|
|
50
|
+
``"loom_code" -> "loom_code/__init__.py"``
|
|
51
|
+
|
|
52
|
+
Both maps are populated so ``from loom_code import cli`` resolves
|
|
53
|
+
correctly: the resolver checks packages first (``loom_code``
|
|
54
|
+
is a package), then attempts to find ``cli`` as a sub-module.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
modules: dict[str, str]
|
|
58
|
+
packages: dict[str, str]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def build_module_index(files: list[DiscoveredFile]) -> _ModuleIndex:
|
|
62
|
+
"""Build the dotted-module ↔ file-path map.
|
|
63
|
+
|
|
64
|
+
``foo/bar/baz.py`` → module ``foo.bar.baz``
|
|
65
|
+
``foo/bar/__init__.py`` → package ``foo.bar``
|
|
66
|
+
|
|
67
|
+
We don't try to detect namespace packages (PEP 420) — those
|
|
68
|
+
have no ``__init__.py`` and disambiguation requires a sys.path
|
|
69
|
+
walk. Loom-code's first-party layout always has explicit
|
|
70
|
+
package roots, so this is fine in practice.
|
|
71
|
+
"""
|
|
72
|
+
modules: dict[str, str] = {}
|
|
73
|
+
packages: dict[str, str] = {}
|
|
74
|
+
for f in files:
|
|
75
|
+
if f.lang != "python":
|
|
76
|
+
continue
|
|
77
|
+
parts = f.rel_path.split("/")
|
|
78
|
+
if parts[-1] == "__init__.py":
|
|
79
|
+
dotted = ".".join(parts[:-1])
|
|
80
|
+
packages[dotted] = f.rel_path
|
|
81
|
+
else:
|
|
82
|
+
name = parts[-1].removesuffix(".py").removesuffix(".pyi")
|
|
83
|
+
dotted = ".".join((*parts[:-1], name))
|
|
84
|
+
modules[dotted] = f.rel_path
|
|
85
|
+
return _ModuleIndex(modules=modules, packages=packages)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def resolve_import(
|
|
89
|
+
*,
|
|
90
|
+
from_file: str,
|
|
91
|
+
to_module: str,
|
|
92
|
+
level: int,
|
|
93
|
+
module_index: _ModuleIndex,
|
|
94
|
+
) -> str | None:
|
|
95
|
+
"""Turn a raw import into a repo-relative file path.
|
|
96
|
+
|
|
97
|
+
Returns the target file's rel_path on success, or ``None`` for
|
|
98
|
+
unresolved (third-party / stdlib / typo) imports — those still
|
|
99
|
+
get recorded in the :class:`schema.ImportEdge` list with
|
|
100
|
+
``resolved=False`` (useful tech-stack signal) but never feed
|
|
101
|
+
into the PageRank graph.
|
|
102
|
+
"""
|
|
103
|
+
target = _resolve_dotted(from_file, to_module, level)
|
|
104
|
+
if target is None:
|
|
105
|
+
return None
|
|
106
|
+
# Try as module first (``foo.bar`` → ``foo/bar.py``), then as
|
|
107
|
+
# package (``foo.bar`` → ``foo/bar/__init__.py``).
|
|
108
|
+
if target in module_index.modules:
|
|
109
|
+
return module_index.modules[target]
|
|
110
|
+
if target in module_index.packages:
|
|
111
|
+
return module_index.packages[target]
|
|
112
|
+
# Could also be ``from foo.bar import baz`` where ``foo.bar``
|
|
113
|
+
# is the module and ``baz`` is a symbol within. We treat the
|
|
114
|
+
# edge as pointing to the module file — granularity is at the
|
|
115
|
+
# file level, which is what PageRank needs.
|
|
116
|
+
parent = ".".join(target.split(".")[:-1])
|
|
117
|
+
if parent in module_index.modules:
|
|
118
|
+
return module_index.modules[parent]
|
|
119
|
+
if parent in module_index.packages:
|
|
120
|
+
return module_index.packages[parent]
|
|
121
|
+
return None
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _resolve_dotted(
|
|
125
|
+
from_file: str, to_module: str, level: int
|
|
126
|
+
) -> str | None:
|
|
127
|
+
"""Apply Python's relative-import rules to produce an absolute
|
|
128
|
+
dotted module path.
|
|
129
|
+
|
|
130
|
+
``level=0`` → ``to_module`` is already absolute.
|
|
131
|
+
``level=1`` → ``to_module`` is relative to ``from_file``'s
|
|
132
|
+
package.
|
|
133
|
+
``level=2`` → relative to the parent package, etc.
|
|
134
|
+
|
|
135
|
+
Returns ``None`` if the level overshoots (``from .. import x``
|
|
136
|
+
in a top-level package).
|
|
137
|
+
"""
|
|
138
|
+
if level == 0:
|
|
139
|
+
return to_module or None
|
|
140
|
+
parts = from_file.split("/")
|
|
141
|
+
# ``from_file = "a/b/c.py"`` → package parts = ["a", "b"]
|
|
142
|
+
pkg_parts = parts[:-1]
|
|
143
|
+
if len(pkg_parts) < level:
|
|
144
|
+
return None
|
|
145
|
+
base = pkg_parts[: len(pkg_parts) - level + 1]
|
|
146
|
+
if to_module:
|
|
147
|
+
base = [*base, *to_module.split(".")]
|
|
148
|
+
return ".".join(base) if base else None
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def resolve_imports(
|
|
152
|
+
raw_imports_by_file: dict[str, list[tuple[str, int, int]]],
|
|
153
|
+
module_index: _ModuleIndex,
|
|
154
|
+
) -> list[ImportEdge]:
|
|
155
|
+
"""Resolve every ``_RawImport`` produced by :mod:`_ast_walk`
|
|
156
|
+
into the schema's :class:`ImportEdge`.
|
|
157
|
+
|
|
158
|
+
Input shape: ``{from_path: [(to_module, line, level), ...]}``.
|
|
159
|
+
We expose this rather than a list of :class:`_RawImport` so the
|
|
160
|
+
caller controls how the per-file results are aggregated.
|
|
161
|
+
"""
|
|
162
|
+
edges: list[ImportEdge] = []
|
|
163
|
+
for from_path, items in raw_imports_by_file.items():
|
|
164
|
+
for to_module, line, level in items:
|
|
165
|
+
resolved_path = resolve_import(
|
|
166
|
+
from_file=from_path,
|
|
167
|
+
to_module=to_module,
|
|
168
|
+
level=level,
|
|
169
|
+
module_index=module_index,
|
|
170
|
+
)
|
|
171
|
+
# Store the dotted module name as-written when resolvable
|
|
172
|
+
# to a real file, ELSE preserve the literal source form
|
|
173
|
+
# — third-party imports are a useful tech-stack signal
|
|
174
|
+
# the annotator can read.
|
|
175
|
+
display_module = (
|
|
176
|
+
to_module
|
|
177
|
+
if level == 0
|
|
178
|
+
else _relative_display(level, to_module)
|
|
179
|
+
)
|
|
180
|
+
edges.append(
|
|
181
|
+
ImportEdge(
|
|
182
|
+
from_path=from_path,
|
|
183
|
+
to_module=display_module,
|
|
184
|
+
line=line,
|
|
185
|
+
resolved=resolved_path is not None,
|
|
186
|
+
)
|
|
187
|
+
)
|
|
188
|
+
return edges
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _relative_display(level: int, to_module: str) -> str:
|
|
192
|
+
"""Render ``from .. import x`` style imports in a stable text
|
|
193
|
+
form for the schema. ``level=2, to_module="x"`` → ``"..x"``."""
|
|
194
|
+
return ("." * level) + to_module
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
# ---------------------------------------------------------------------------
|
|
198
|
+
# API surface
|
|
199
|
+
# ---------------------------------------------------------------------------
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def detect_api_surface(
|
|
203
|
+
files: list[DiscoveredFile], module_index: _ModuleIndex
|
|
204
|
+
) -> set[str]:
|
|
205
|
+
"""Return the set of ``rel_path``s reachable from any
|
|
206
|
+
``__init__.py``.
|
|
207
|
+
|
|
208
|
+
Heuristic:
|
|
209
|
+
|
|
210
|
+
* Every file an ``__init__.py`` does ``from .module import X``
|
|
211
|
+
against → API surface.
|
|
212
|
+
* Every dotted entry in an ``__all__`` literal of an
|
|
213
|
+
``__init__.py`` whose target resolves to a file → API surface.
|
|
214
|
+
|
|
215
|
+
We do NOT chase the dependency graph past the first hop — being
|
|
216
|
+
"imported by something on the API surface" is private-by-default.
|
|
217
|
+
If a user pulls a helper into ``__init__.py`` to publish it,
|
|
218
|
+
they're saying *that* helper is public; its callees aren't
|
|
219
|
+
automatically.
|
|
220
|
+
"""
|
|
221
|
+
api: set[str] = set()
|
|
222
|
+
for f in files:
|
|
223
|
+
if not f.rel_path.endswith("__init__.py"):
|
|
224
|
+
continue
|
|
225
|
+
try:
|
|
226
|
+
tree = ast.parse(f.abs_path.read_text(encoding="utf-8"))
|
|
227
|
+
except (SyntaxError, OSError):
|
|
228
|
+
continue
|
|
229
|
+
for node in tree.body:
|
|
230
|
+
if isinstance(node, ast.ImportFrom):
|
|
231
|
+
target = resolve_import(
|
|
232
|
+
from_file=f.rel_path,
|
|
233
|
+
to_module=node.module or "",
|
|
234
|
+
level=node.level or 0,
|
|
235
|
+
module_index=module_index,
|
|
236
|
+
)
|
|
237
|
+
if target is not None:
|
|
238
|
+
api.add(target)
|
|
239
|
+
elif isinstance(node, ast.Assign):
|
|
240
|
+
# Find __all__ = ["a", "b"] — each name resolved as
|
|
241
|
+
# a sibling module of this __init__.py.
|
|
242
|
+
for target in node.targets:
|
|
243
|
+
if isinstance(target, ast.Name) and target.id == "__all__":
|
|
244
|
+
api.update(
|
|
245
|
+
_resolve_all_entries(
|
|
246
|
+
node.value, f.rel_path, module_index
|
|
247
|
+
)
|
|
248
|
+
)
|
|
249
|
+
return api
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _resolve_all_entries(
|
|
253
|
+
value: ast.expr, init_path: str, module_index: _ModuleIndex
|
|
254
|
+
) -> set[str]:
|
|
255
|
+
"""``__all__ = [...]`` — pull out string literals and try to
|
|
256
|
+
resolve each as a sibling module of ``init_path``. Anything that
|
|
257
|
+
doesn't resolve gets quietly skipped (the entry might be a
|
|
258
|
+
re-export symbol, not a sub-module — that's still API surface
|
|
259
|
+
but covered by the ImportFrom pass)."""
|
|
260
|
+
paths: set[str] = set()
|
|
261
|
+
if not isinstance(value, ast.List | ast.Tuple):
|
|
262
|
+
return paths
|
|
263
|
+
init_pkg = ".".join(init_path.split("/")[:-1])
|
|
264
|
+
for elt in value.elts:
|
|
265
|
+
if isinstance(elt, ast.Constant) and isinstance(elt.value, str):
|
|
266
|
+
dotted = f"{init_pkg}.{elt.value}" if init_pkg else elt.value
|
|
267
|
+
if dotted in module_index.modules:
|
|
268
|
+
paths.add(module_index.modules[dotted])
|
|
269
|
+
elif dotted in module_index.packages:
|
|
270
|
+
paths.add(module_index.packages[dotted])
|
|
271
|
+
return paths
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
# ---------------------------------------------------------------------------
|
|
275
|
+
# Entry points
|
|
276
|
+
# ---------------------------------------------------------------------------
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def extract_entry_points(
|
|
280
|
+
*,
|
|
281
|
+
repo_root: Path,
|
|
282
|
+
files: list[DiscoveredFile],
|
|
283
|
+
decorators: list[_RawDecorator],
|
|
284
|
+
decorator_path_lookup: dict[_RawDecorator, str],
|
|
285
|
+
) -> list[EntryPoint]:
|
|
286
|
+
"""Mine entry points from three sources.
|
|
287
|
+
|
|
288
|
+
``decorator_path_lookup`` maps every ``_RawDecorator`` back to
|
|
289
|
+
the file it came from — the extractor builds it during its
|
|
290
|
+
aggregation pass (decorators don't carry the source file inside
|
|
291
|
+
the dataclass because :mod:`_ast_walk` is per-file already).
|
|
292
|
+
"""
|
|
293
|
+
out: list[EntryPoint] = []
|
|
294
|
+
|
|
295
|
+
# 1. pyproject.toml [project.scripts]
|
|
296
|
+
pyproject = repo_root / "pyproject.toml"
|
|
297
|
+
if pyproject.exists():
|
|
298
|
+
try:
|
|
299
|
+
data = tomllib.loads(pyproject.read_text(encoding="utf-8"))
|
|
300
|
+
except (tomllib.TOMLDecodeError, OSError):
|
|
301
|
+
data = {}
|
|
302
|
+
scripts = (
|
|
303
|
+
data.get("project", {}).get("scripts", {})
|
|
304
|
+
if isinstance(data.get("project"), dict)
|
|
305
|
+
else {}
|
|
306
|
+
)
|
|
307
|
+
for name, target in scripts.items():
|
|
308
|
+
if not isinstance(target, str):
|
|
309
|
+
continue
|
|
310
|
+
out.append(
|
|
311
|
+
EntryPoint(
|
|
312
|
+
kind="pyproject_script",
|
|
313
|
+
name=name,
|
|
314
|
+
path="pyproject.toml",
|
|
315
|
+
line=None,
|
|
316
|
+
callable_id=_target_to_symbol_id(target, files),
|
|
317
|
+
)
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
# 2. ``if __name__ == "__main__":`` blocks
|
|
321
|
+
for f in files:
|
|
322
|
+
if f.lang != "python":
|
|
323
|
+
continue
|
|
324
|
+
try:
|
|
325
|
+
tree = ast.parse(f.abs_path.read_text(encoding="utf-8"))
|
|
326
|
+
except (SyntaxError, OSError):
|
|
327
|
+
continue
|
|
328
|
+
for node in tree.body:
|
|
329
|
+
if _is_main_block(node):
|
|
330
|
+
out.append(
|
|
331
|
+
EntryPoint(
|
|
332
|
+
kind="main_block",
|
|
333
|
+
name=f.rel_path,
|
|
334
|
+
path=f.rel_path,
|
|
335
|
+
line=node.lineno,
|
|
336
|
+
callable_id=None,
|
|
337
|
+
)
|
|
338
|
+
)
|
|
339
|
+
break # one main block per file is enough
|
|
340
|
+
|
|
341
|
+
# 3. Landmark decorators
|
|
342
|
+
for dec in decorators:
|
|
343
|
+
path = decorator_path_lookup.get(dec)
|
|
344
|
+
if path is None:
|
|
345
|
+
continue
|
|
346
|
+
out.append(
|
|
347
|
+
EntryPoint(
|
|
348
|
+
kind="decorated",
|
|
349
|
+
name=dec.decorator,
|
|
350
|
+
path=path,
|
|
351
|
+
line=dec.line,
|
|
352
|
+
callable_id=f"{path}:{dec.target_qualname}",
|
|
353
|
+
)
|
|
354
|
+
)
|
|
355
|
+
return out
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def _is_main_block(node: ast.stmt) -> bool:
|
|
359
|
+
"""``if __name__ == "__main__":`` detection — exact AST shape
|
|
360
|
+
match (no fuzziness; if the user wrote it weirdly that's on
|
|
361
|
+
them)."""
|
|
362
|
+
if not isinstance(node, ast.If):
|
|
363
|
+
return False
|
|
364
|
+
test = node.test
|
|
365
|
+
if not isinstance(test, ast.Compare):
|
|
366
|
+
return False
|
|
367
|
+
if not (
|
|
368
|
+
isinstance(test.left, ast.Name) and test.left.id == "__name__"
|
|
369
|
+
):
|
|
370
|
+
return False
|
|
371
|
+
if len(test.comparators) != 1 or len(test.ops) != 1:
|
|
372
|
+
return False
|
|
373
|
+
if not isinstance(test.ops[0], ast.Eq):
|
|
374
|
+
return False
|
|
375
|
+
rhs = test.comparators[0]
|
|
376
|
+
return isinstance(rhs, ast.Constant) and rhs.value == "__main__"
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def _target_to_symbol_id(
|
|
380
|
+
target: str, files: list[DiscoveredFile]
|
|
381
|
+
) -> str | None:
|
|
382
|
+
"""``"loom_code.cli:main"`` → ``"loom_code/cli.py:main"`` when
|
|
383
|
+
the module file exists. Returns ``None`` otherwise (annotator
|
|
384
|
+
falls back to the literal string in LOOM.md)."""
|
|
385
|
+
if ":" not in target:
|
|
386
|
+
return None
|
|
387
|
+
module_dotted, _, callable_name = target.partition(":")
|
|
388
|
+
rel = module_dotted.replace(".", "/") + ".py"
|
|
389
|
+
for f in files:
|
|
390
|
+
if f.rel_path == rel:
|
|
391
|
+
return f"{rel}:{callable_name}"
|
|
392
|
+
return None
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Map test files to the symbols they exercise.
|
|
2
|
+
|
|
3
|
+
For each non-test public symbol, scan every test file for exact-
|
|
4
|
+
match references to its bare name (``Agent`` not ``loomflow.Agent``).
|
|
5
|
+
Bare-name match works because the test will have either imported
|
|
6
|
+
the symbol (so it appears as a bare identifier in the body) or
|
|
7
|
+
called it via an attribute chain (where the bare name still
|
|
8
|
+
appears).
|
|
9
|
+
|
|
10
|
+
False positives are tolerated — a test that mentions ``Agent`` in
|
|
11
|
+
a comment but doesn't really exercise it is a minor inaccuracy
|
|
12
|
+
the agent can verify in seconds. False negatives (missing edges)
|
|
13
|
+
are also tolerated; the agent can still grep manually.
|
|
14
|
+
|
|
15
|
+
Cost: O(test_file_bytes × n_symbols) in the worst case. We bound
|
|
16
|
+
by building a single regex with alternation over all symbol names
|
|
17
|
+
and running it once per test file — linear in the test corpus.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import re
|
|
23
|
+
from collections.abc import Iterable
|
|
24
|
+
|
|
25
|
+
from ._files import DiscoveredFile
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def build_test_map(
|
|
29
|
+
*,
|
|
30
|
+
files: list[DiscoveredFile],
|
|
31
|
+
symbol_names: Iterable[str],
|
|
32
|
+
) -> dict[str, list[str]]:
|
|
33
|
+
"""Return ``{symbol_name: ["test_file:line", ...]}``.
|
|
34
|
+
|
|
35
|
+
``symbol_names`` is the bare-name set (NOT qualified). When two
|
|
36
|
+
symbols share a name (``foo`` in two modules), both attribution
|
|
37
|
+
to the same test is the honest answer — we can't disambiguate
|
|
38
|
+
without execution.
|
|
39
|
+
"""
|
|
40
|
+
names = sorted({n for n in symbol_names if _is_match_candidate(n)})
|
|
41
|
+
if not names:
|
|
42
|
+
return {}
|
|
43
|
+
# One regex per scan — Python's re engine is fast and alternation
|
|
44
|
+
# over a few hundred names is fine. Word-boundary anchored so
|
|
45
|
+
# ``Agent`` doesn't match ``AgentManager``.
|
|
46
|
+
pattern = re.compile(
|
|
47
|
+
r"\b(" + "|".join(re.escape(n) for n in names) + r")\b"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
out: dict[str, list[str]] = {n: [] for n in names}
|
|
51
|
+
for f in files:
|
|
52
|
+
if not f.is_test or f.lang != "python":
|
|
53
|
+
continue
|
|
54
|
+
try:
|
|
55
|
+
text = f.abs_path.read_text(encoding="utf-8")
|
|
56
|
+
except OSError:
|
|
57
|
+
continue
|
|
58
|
+
# Track first-line hit per name in this file — we want a
|
|
59
|
+
# citation, not every occurrence.
|
|
60
|
+
seen_in_file: dict[str, int] = {}
|
|
61
|
+
for i, line in enumerate(text.splitlines(), start=1):
|
|
62
|
+
for match in pattern.finditer(line):
|
|
63
|
+
name = match.group(1)
|
|
64
|
+
if name not in seen_in_file:
|
|
65
|
+
seen_in_file[name] = i
|
|
66
|
+
for name, line_no in seen_in_file.items():
|
|
67
|
+
out[name].append(f"{f.rel_path}:{line_no}")
|
|
68
|
+
return out
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _is_match_candidate(name: str) -> bool:
|
|
72
|
+
"""Skip names too generic / too short to bare-name-match without
|
|
73
|
+
a flood of false positives.
|
|
74
|
+
|
|
75
|
+
Heuristic: identifier must be at least 4 characters, and must
|
|
76
|
+
not be a Python keyword or builtin. We don't have the full
|
|
77
|
+
builtin list inline — the length filter alone catches the worst
|
|
78
|
+
offenders (``run``, ``main``, ``new``, ``foo``)."""
|
|
79
|
+
if len(name) < 4:
|
|
80
|
+
return False
|
|
81
|
+
if name in _IGNORED_NAMES:
|
|
82
|
+
return False
|
|
83
|
+
return True
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# Common method/function names that are present in basically every
|
|
87
|
+
# codebase. Bare-match for these would be noise. Filter ruthlessly
|
|
88
|
+
# — false-negative is "agent can still grep" while false-positive
|
|
89
|
+
# is "agent reads N false-positive tests".
|
|
90
|
+
_IGNORED_NAMES: frozenset[str] = frozenset(
|
|
91
|
+
{
|
|
92
|
+
"main",
|
|
93
|
+
"run",
|
|
94
|
+
"test",
|
|
95
|
+
"tests",
|
|
96
|
+
"setup",
|
|
97
|
+
"teardown",
|
|
98
|
+
"Setup",
|
|
99
|
+
"Teardown",
|
|
100
|
+
"name",
|
|
101
|
+
"data",
|
|
102
|
+
"value",
|
|
103
|
+
"item",
|
|
104
|
+
"items",
|
|
105
|
+
"load",
|
|
106
|
+
"save",
|
|
107
|
+
}
|
|
108
|
+
)
|