codemap-core 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. codemap/__init__.py +7 -0
  2. codemap/cli/__init__.py +3 -0
  3. codemap/cli/_common.py +90 -0
  4. codemap/cli/commands/__init__.py +3 -0
  5. codemap/cli/commands/callees.py +102 -0
  6. codemap/cli/commands/callers.py +107 -0
  7. codemap/cli/commands/config.py +78 -0
  8. codemap/cli/commands/diagnostics.py +142 -0
  9. codemap/cli/commands/doctor.py +158 -0
  10. codemap/cli/commands/get.py +93 -0
  11. codemap/cli/commands/index.py +725 -0
  12. codemap/cli/commands/routes.py +104 -0
  13. codemap/cli/commands/search.py +78 -0
  14. codemap/cli/commands/trace.py +179 -0
  15. codemap/cli/main.py +140 -0
  16. codemap/cli/renderers/__init__.py +3 -0
  17. codemap/cli/renderers/json.py +32 -0
  18. codemap/cli/renderers/text.py +24 -0
  19. codemap/config/__init__.py +31 -0
  20. codemap/config/loader.py +96 -0
  21. codemap/config/schema.py +122 -0
  22. codemap/core/__init__.py +7 -0
  23. codemap/core/bridge/__init__.py +8 -0
  24. codemap/core/bridge/base.py +38 -0
  25. codemap/core/bridge/http_route.py +374 -0
  26. codemap/core/bridge/python_cross_module.py +120 -0
  27. codemap/core/bridge/registry.py +117 -0
  28. codemap/core/graph.py +183 -0
  29. codemap/core/models.py +299 -0
  30. codemap/core/store.py +78 -0
  31. codemap/core/symbol.py +314 -0
  32. codemap/diagnostics/__init__.py +3 -0
  33. codemap/diagnostics/exit_codes.py +30 -0
  34. codemap/diagnostics/logging.py +65 -0
  35. codemap/diagnostics/progress.py +68 -0
  36. codemap/indexers/__init__.py +9 -0
  37. codemap/indexers/_example_lang.py +135 -0
  38. codemap/indexers/base.py +77 -0
  39. codemap/indexers/python.py +577 -0
  40. codemap/indexers/registry.py +104 -0
  41. codemap/io/__init__.py +8 -0
  42. codemap/io/atomic.py +97 -0
  43. codemap/io/base.py +12 -0
  44. codemap/io/json_store.py +433 -0
  45. codemap/io/lock.py +87 -0
  46. codemap/io/manifest.py +90 -0
  47. codemap/mcp/__init__.py +3 -0
  48. codemap_core-0.1.0.dist-info/METADATA +480 -0
  49. codemap_core-0.1.0.dist-info/RECORD +52 -0
  50. codemap_core-0.1.0.dist-info/WHEEL +4 -0
  51. codemap_core-0.1.0.dist-info/entry_points.txt +10 -0
  52. codemap_core-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,77 @@
1
+ """Indexer Protocol — the contract every language / asset indexer implements.
2
+
3
+ All indexers, whether shipped in this repository or installed as third-party
4
+ plugins via the ``codemap.indexers`` entry-point group, register through the
5
+ same protocol with equal standing (ADR-004, ADR-L001).
6
+
7
+ Each indexer is responsible for one file at a time. Exceptions raised by a
8
+ single ``index_file`` call must be caught at the orchestration layer and
9
+ converted into ``Diagnostic`` entries — a single bad file must not abort an
10
+ entire indexing run (ADR-007).
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from dataclasses import dataclass, field
16
+ from pathlib import Path, PurePosixPath
17
+ from typing import ClassVar, Protocol, runtime_checkable
18
+
19
+ from codemap.core.models import IndexResult
20
+
21
+ __all__ = ["IndexContext", "IndexResult", "Indexer"]
22
+
23
+
24
+ @dataclass(frozen=True, slots=True)
25
+ class IndexContext:
26
+ """Per-run context passed to every ``Indexer.index_file`` invocation.
27
+
28
+ Carries information that the indexer may need but should not assume from
29
+ its environment: where the project root is, what language the file was
30
+ detected as, and a few advisory hooks.
31
+ """
32
+
33
+ project_root: Path
34
+ relative_path: PurePosixPath
35
+ language: str
36
+ config: dict[str, object] = field(default_factory=dict)
37
+
38
+
39
+ @runtime_checkable
40
+ class Indexer(Protocol):
41
+ """The required interface for any indexer implementation."""
42
+
43
+ name: ClassVar[str]
44
+ """Unique short identifier (lowercase, ASCII, no spaces)."""
45
+
46
+ version: ClassVar[str]
47
+ """Semantic version of the indexer itself (independent of CodeMap)."""
48
+
49
+ file_patterns: ClassVar[list[str]]
50
+ """Glob patterns the indexer claims (e.g. ``["*.py"]``). Used for
51
+ fast dispatch; the orchestrator still calls :meth:`supports` to confirm."""
52
+
53
+ languages: ClassVar[list[str]]
54
+ """The set of language tags this indexer can emit. May contain custom
55
+ asset names (e.g. ``"openapi"``); not restricted to programming languages."""
56
+
57
+ def supports(self, path: Path) -> bool:
58
+ """Return True if this indexer wants to handle ``path``.
59
+
60
+ Called after ``file_patterns`` matches; lets the indexer reject files
61
+ by content (e.g. a YAML file that is *not* an OpenAPI schema).
62
+ """
63
+ ...
64
+
65
+ def index_file(
66
+ self,
67
+ path: Path,
68
+ source: bytes,
69
+ ctx: IndexContext,
70
+ ) -> IndexResult:
71
+ """Parse ``source`` and return symbols, edges, routes, diagnostics.
72
+
73
+ ``path`` is the absolute on-disk location; ``ctx.relative_path`` is the
74
+ path relative to the project root that should be stored on Symbol /
75
+ Edge / Diagnostic ``.file`` fields for cross-machine portability.
76
+ """
77
+ ...
@@ -0,0 +1,577 @@
1
+ """Python indexer — built on the stdlib ``ast`` module.
2
+
3
+ This is the first real-language indexer (Sprint N-1). It is shipped in the
4
+ main repository because (a) CodeMap is itself written in Python, so we get
5
+ dogfooding for free, and (b) ``ast`` is zero-dependency and trivially
6
+ correct. Per ADR-011, this is an *engineering* choice — Python carries no
7
+ product-level privilege over any other language that ships an indexer
8
+ through the ``codemap.indexers`` entry-point group (ADR-004).
9
+
10
+ Scheme: ``scip-python``. Symbol IDs encode the file path as a chain of
11
+ ``namespace`` descriptors and the in-file scope as nested namespace / type
12
+ descriptors with the leaf descriptor matching the symbol kind:
13
+
14
+ * module-level function ``foo`` in ``src/m.py``:
15
+ ``scip-python local . . src/m.py/foo().``
16
+ * class ``Bar`` in ``src/m.py``:
17
+ ``scip-python local . . src/m.py/Bar#``
18
+ * method ``baz`` on ``Bar``:
19
+ ``scip-python local . . src/m.py/Bar#baz().``
20
+ * module-level variable ``BAZ``:
21
+ ``scip-python local . . src/m.py/BAZ.``
22
+
23
+ Resolution policy (MVP):
24
+
25
+ * Calls to bare names (``foo()``) resolve to the same-file symbol if one
26
+ exists; otherwise they are dropped (no edge) and a low-confidence
27
+ diagnostic is *not* emitted — bare-name resolution failures are common
28
+ (built-ins, dynamic globals) and would flood diagnostics.
29
+ * Calls through ``self.x.y()`` are ignored at MVP — they require type
30
+ inference. A diagnostic with code ``PY101`` is recorded at debug level.
31
+ * Class inheritance is recorded as ``extends`` edges; bases are kept as
32
+ the raw textual name when the parent cannot be resolved.
33
+ * ``import`` / ``from ... import ...`` produce ``imports`` edges to a
34
+ synthetic module symbol (kind=``module``).
35
+ """
36
+
37
+ from __future__ import annotations
38
+
39
+ import ast
40
+ from enum import StrEnum
41
+ from pathlib import Path, PurePosixPath
42
+ from typing import ClassVar, Literal
43
+
44
+ from codemap.core.models import Diagnostic, Edge, IndexResult, Range, Symbol
45
+ from codemap.core.symbol import Descriptor, DescriptorKind, SymbolID
46
+ from codemap.indexers.base import IndexContext
47
+
48
+ SCHEME = "scip-python"
49
+ LANG = "python"
50
+
51
+
52
+ class PythonIndexer:
53
+ name: ClassVar[str] = "python"
54
+ version: ClassVar[str] = "0.1.0"
55
+ file_patterns: ClassVar[list[str]] = ["*.py", "*.pyi"]
56
+ languages: ClassVar[list[str]] = [LANG]
57
+
58
+ def supports(self, path: Path) -> bool:
59
+ return path.suffix in {".py", ".pyi"}
60
+
61
+ def index_file(
62
+ self,
63
+ path: Path,
64
+ source: bytes,
65
+ ctx: IndexContext,
66
+ ) -> IndexResult:
67
+ try:
68
+ text = source.decode("utf-8")
69
+ except UnicodeDecodeError as exc:
70
+ return IndexResult(
71
+ diagnostics=[
72
+ Diagnostic(
73
+ severity="error",
74
+ file=ctx.relative_path,
75
+ code="PY002",
76
+ message=f"not valid UTF-8: {exc}",
77
+ producer=self.name,
78
+ )
79
+ ]
80
+ )
81
+ try:
82
+ tree = ast.parse(text, filename=str(ctx.relative_path))
83
+ except SyntaxError as exc:
84
+ line = exc.lineno or 1
85
+ return IndexResult(
86
+ diagnostics=[
87
+ Diagnostic(
88
+ severity="error",
89
+ file=ctx.relative_path,
90
+ range=Range(start_line=line, end_line=line),
91
+ code="PY001",
92
+ message=f"syntax error: {exc.msg}",
93
+ producer=self.name,
94
+ )
95
+ ]
96
+ )
97
+ visitor = _Visitor(ctx.relative_path)
98
+ visitor.visit(tree)
99
+ return IndexResult(
100
+ symbols=visitor.symbols,
101
+ edges=visitor.edges,
102
+ diagnostics=visitor.diagnostics,
103
+ )
104
+
105
+
106
+ # ---------------------------------------------------------------------------
107
+ # Internals
108
+ # ---------------------------------------------------------------------------
109
+
110
+
111
+ class _Visitor(ast.NodeVisitor):
112
+ """Single-pass AST visitor that builds symbols, edges, and diagnostics."""
113
+
114
+ def __init__(self, relative_path: PurePosixPath) -> None:
115
+ self.relative_path = relative_path
116
+ self.symbols: list[Symbol] = []
117
+ self.edges: list[Edge] = []
118
+ self.diagnostics: list[Diagnostic] = []
119
+
120
+ self._scope: list[_Scope] = []
121
+ self._imports: dict[str, str] = {}
122
+ """Local-name → dotted module path."""
123
+ self._symbol_index: dict[str, SymbolID] = {}
124
+ """Bare local name → SymbolID, used for same-file call resolution."""
125
+
126
+ # ------------------------------------------------------------- modules
127
+
128
+ def visit_Module(self, node: ast.Module) -> None:
129
+ self.generic_visit(node)
130
+
131
+ # ------------------------------------------------------------ imports
132
+
133
+ def visit_Import(self, node: ast.Import) -> None:
134
+ for alias in node.names:
135
+ local = alias.asname or alias.name.split(".")[0]
136
+ self._imports[local] = alias.name
137
+ target = _module_symbol_id(alias.name)
138
+ source = self._enclosing_symbol_id()
139
+ if source is not None:
140
+ self.edges.append(
141
+ Edge(
142
+ source=source,
143
+ target=target,
144
+ kind="imports",
145
+ location=_node_range(node),
146
+ )
147
+ )
148
+
149
+ def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
150
+ module = node.module or ""
151
+ for alias in node.names:
152
+ local = alias.asname or alias.name
153
+ dotted = f"{module}.{alias.name}" if module else alias.name
154
+ self._imports[local] = dotted
155
+ target = _module_symbol_id(module or alias.name)
156
+ source = self._enclosing_symbol_id()
157
+ if source is not None:
158
+ self.edges.append(
159
+ Edge(
160
+ source=source,
161
+ target=target,
162
+ kind="imports",
163
+ location=_node_range(node),
164
+ )
165
+ )
166
+
167
+ # -------------------------------------------------------- definitions
168
+
169
+ def visit_ClassDef(self, node: ast.ClassDef) -> None:
170
+ sid = self._make_id(node.name, _Kind.CLASS)
171
+ sym = Symbol(
172
+ id=sid,
173
+ kind="class",
174
+ language=LANG,
175
+ file=self.relative_path,
176
+ range=_node_range(node),
177
+ doc=ast.get_docstring(node),
178
+ extra={"decorators": _decorator_names(node.decorator_list)}
179
+ if node.decorator_list
180
+ else {},
181
+ )
182
+ self.symbols.append(sym)
183
+ self._record_local_name(node.name, sid)
184
+
185
+ for base in node.bases:
186
+ base_name = _format_attr(base)
187
+ if base_name is None:
188
+ continue
189
+ target = self._resolve_name(base_name)
190
+ self.edges.append(
191
+ Edge(
192
+ source=sid,
193
+ target=target,
194
+ kind="extends",
195
+ location=_node_range(base),
196
+ confidence="high"
197
+ if base_name in self._imports or base_name in self._symbol_index
198
+ else "medium",
199
+ )
200
+ )
201
+
202
+ with self._push_scope(_Scope(name=node.name, kind=_Kind.CLASS)):
203
+ self.generic_visit(node)
204
+
205
+ def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
206
+ self._visit_func(node, is_async=False)
207
+
208
+ def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
209
+ self._visit_func(node, is_async=True)
210
+
211
+ def _visit_func(
212
+ self,
213
+ node: ast.FunctionDef | ast.AsyncFunctionDef,
214
+ *,
215
+ is_async: bool,
216
+ ) -> None:
217
+ in_class = bool(self._scope) and self._scope[-1].kind is _Kind.CLASS
218
+ kind: Literal["method", "function"] = "method" if in_class else "function"
219
+ sid = self._make_id(node.name, _Kind.METHOD)
220
+ extra: dict[str, object] = {}
221
+ if is_async:
222
+ extra["async"] = True
223
+ if node.decorator_list:
224
+ extra["decorators"] = _decorator_names(node.decorator_list)
225
+
226
+ http_route = _extract_http_route(node.decorator_list)
227
+ if http_route is not None:
228
+ extra["http_route"] = http_route
229
+ http_calls = _extract_http_calls(node.body)
230
+ if http_calls:
231
+ extra["http_calls"] = http_calls
232
+
233
+ sym = Symbol(
234
+ id=sid,
235
+ kind=kind,
236
+ language=LANG,
237
+ file=self.relative_path,
238
+ range=_node_range(node),
239
+ signature=_function_signature(node),
240
+ doc=ast.get_docstring(node),
241
+ extra=extra,
242
+ )
243
+ self.symbols.append(sym)
244
+ self._record_local_name(node.name, sid)
245
+
246
+ with self._push_scope(_Scope(name=node.name, kind=_Kind.METHOD, symbol_id=sid)):
247
+ for child in node.body:
248
+ self.visit(child)
249
+
250
+ def visit_Assign(self, node: ast.Assign) -> None:
251
+ # Only record module / class-level assignments. Function-locals are
252
+ # not exposed as symbols.
253
+ if not self._scope or self._scope[-1].kind is _Kind.CLASS:
254
+ for target in node.targets:
255
+ if isinstance(target, ast.Name):
256
+ sid = self._make_id(target.id, _Kind.TERM)
257
+ sym_kind: Literal["field", "variable"] = "field" if self._scope else "variable"
258
+ sym = Symbol(
259
+ id=sid,
260
+ kind=sym_kind,
261
+ language=LANG,
262
+ file=self.relative_path,
263
+ range=_node_range(node),
264
+ )
265
+ self.symbols.append(sym)
266
+ self._record_local_name(target.id, sid)
267
+ self.generic_visit(node)
268
+
269
+ # -------------------------------------------------------------- calls
270
+
271
+ def visit_Call(self, node: ast.Call) -> None:
272
+ target_name = _format_attr(node.func)
273
+ source = self._enclosing_callable_id()
274
+ if source is not None and target_name is not None:
275
+ target = self._resolve_name(target_name)
276
+ self.edges.append(
277
+ Edge(
278
+ source=source,
279
+ target=target,
280
+ kind="calls",
281
+ location=_node_range(node),
282
+ confidence=(
283
+ "high" if target_name.split(".")[0] in self._symbol_index else "medium"
284
+ ),
285
+ )
286
+ )
287
+ self.generic_visit(node)
288
+
289
+ # ----------------------------------------------------------- helpers
290
+
291
+ def _enclosing_symbol_id(self) -> SymbolID | None:
292
+ for scope in reversed(self._scope):
293
+ if scope.symbol_id is not None:
294
+ return scope.symbol_id
295
+ return None
296
+
297
+ def _enclosing_callable_id(self) -> SymbolID | None:
298
+ for scope in reversed(self._scope):
299
+ if scope.kind is _Kind.METHOD:
300
+ return scope.symbol_id
301
+ return None
302
+
303
+ def _make_id(self, name: str, kind: _Kind) -> SymbolID:
304
+ descriptors = list(_path_namespaces(self.relative_path))
305
+ for scope in self._scope:
306
+ if scope.kind is _Kind.CLASS:
307
+ descriptors.append(Descriptor(name=scope.name, kind=DescriptorKind.TYPE))
308
+ elif scope.kind is _Kind.METHOD:
309
+ # Functions/methods do not open a SymbolID namespace; their
310
+ # body's nested definitions become siblings at the module
311
+ # level. (Closures live as anonymous data — we don't index
312
+ # them in this MVP.)
313
+ pass
314
+ if kind is _Kind.CLASS:
315
+ descriptors.append(Descriptor(name=name, kind=DescriptorKind.TYPE))
316
+ elif kind is _Kind.METHOD:
317
+ descriptors.append(Descriptor(name=name, kind=DescriptorKind.METHOD))
318
+ else:
319
+ descriptors.append(Descriptor(name=name, kind=DescriptorKind.TERM))
320
+ return SymbolID(scheme=SCHEME, descriptors=tuple(descriptors))
321
+
322
+ def _record_local_name(self, name: str, sid: SymbolID) -> None:
323
+ # Module-level and class-level names are addressable from the same
324
+ # file; function-locals are not.
325
+ if not self._scope or self._scope[-1].kind is _Kind.CLASS:
326
+ self._symbol_index[name] = sid
327
+
328
+ def _resolve_name(self, dotted: str) -> SymbolID:
329
+ head, _, _ = dotted.partition(".")
330
+ if dotted in self._symbol_index:
331
+ return self._symbol_index[dotted]
332
+ if head in self._symbol_index and "." not in dotted:
333
+ return self._symbol_index[head]
334
+ if head in self._imports:
335
+ module = self._imports[head]
336
+ # ``import x.y``: head=x, imports[x]=x.y, dotted may be x.func or x
337
+ if "." in dotted:
338
+ _, _, leaf = dotted.partition(".")
339
+ return _external_symbol_id(module, leaf)
340
+ return _module_symbol_id(module)
341
+ # Bare unresolved name: return a `local`-scheme placeholder so the
342
+ # edge survives but is clearly external. Bridges may later resolve it.
343
+ return SymbolID(
344
+ scheme="local",
345
+ descriptors=(Descriptor(name=dotted, kind=DescriptorKind.TERM),),
346
+ )
347
+
348
+ def _push_scope(self, scope: _Scope) -> _ScopeContext:
349
+ return _ScopeContext(self._scope, scope)
350
+
351
+
352
+ # ---------------------------------------------------------------------------
353
+ # Scope tracking
354
+ # ---------------------------------------------------------------------------
355
+
356
+
357
+ class _Kind(StrEnum):
358
+ CLASS = "class"
359
+ METHOD = "method"
360
+ TERM = "term"
361
+
362
+
363
+ class _Scope:
364
+ def __init__(self, *, name: str, kind: str, symbol_id: SymbolID | None = None) -> None:
365
+ self.name = name
366
+ self.kind = kind
367
+ self.symbol_id = symbol_id
368
+
369
+
370
+ class _ScopeContext:
371
+ def __init__(self, stack: list[_Scope], scope: _Scope) -> None:
372
+ self._stack = stack
373
+ self._scope = scope
374
+
375
+ def __enter__(self) -> _Scope:
376
+ self._stack.append(self._scope)
377
+ return self._scope
378
+
379
+ def __exit__(self, *exc: object) -> None:
380
+ self._stack.pop()
381
+
382
+
383
+ # ---------------------------------------------------------------------------
384
+ # Pure helpers
385
+ # ---------------------------------------------------------------------------
386
+
387
+
388
+ def _path_namespaces(path: PurePosixPath) -> list[Descriptor]:
389
+ """Encode the file path as a chain of namespace descriptors.
390
+
391
+ ``src/foo/bar.py`` becomes ``src/ foo/ bar.py/``. The trailing component
392
+ keeps its ``.py`` extension to make symbols visually identifiable.
393
+ """
394
+ return [Descriptor(name=part, kind=DescriptorKind.NAMESPACE) for part in path.parts]
395
+
396
+
397
+ def _module_symbol_id(dotted_module: str) -> SymbolID:
398
+ """Synthetic SymbolID for a Python module (used as edge target)."""
399
+ parts = dotted_module.split(".") if dotted_module else ["<root>"]
400
+ descriptors = [Descriptor(name=p, kind=DescriptorKind.NAMESPACE) for p in parts[:-1]]
401
+ descriptors.append(Descriptor(name=parts[-1], kind=DescriptorKind.META))
402
+ return SymbolID(scheme=SCHEME, descriptors=tuple(descriptors))
403
+
404
+
405
+ def _external_symbol_id(dotted_module: str, leaf: str) -> SymbolID:
406
+ """SymbolID for ``module.name`` references where the module is external."""
407
+ parts = dotted_module.split(".") if dotted_module else []
408
+ descriptors = [Descriptor(name=p, kind=DescriptorKind.NAMESPACE) for p in parts]
409
+ descriptors.append(Descriptor(name=leaf, kind=DescriptorKind.TERM))
410
+ return SymbolID(scheme=SCHEME, descriptors=tuple(descriptors))
411
+
412
+
413
+ def _node_range(node: ast.AST) -> Range:
414
+ start_line = getattr(node, "lineno", 1) or 1
415
+ end_line = getattr(node, "end_lineno", start_line) or start_line
416
+ start_col = getattr(node, "col_offset", 0) or 0
417
+ end_col = getattr(node, "end_col_offset", 0) or 0
418
+ return Range(
419
+ start_line=start_line,
420
+ start_col=start_col,
421
+ end_line=max(end_line, start_line),
422
+ end_col=end_col,
423
+ )
424
+
425
+
426
+ def _function_signature(node: ast.FunctionDef | ast.AsyncFunctionDef) -> str:
427
+ try:
428
+ args_src = ast.unparse(node.args)
429
+ except Exception: # pragma: no cover - defensive
430
+ args_src = "..."
431
+ prefix = "async def" if isinstance(node, ast.AsyncFunctionDef) else "def"
432
+ return f"{prefix} {node.name}({args_src})"
433
+
434
+
435
+ def _decorator_names(decorators: list[ast.expr]) -> list[str]:
436
+ out: list[str] = []
437
+ for d in decorators:
438
+ name = _format_attr(d.func if isinstance(d, ast.Call) else d)
439
+ if name is not None:
440
+ out.append(name)
441
+ return out
442
+
443
+
444
+ def _format_attr(node: ast.AST) -> str | None:
445
+ """Render an ``ast.Name`` / ``ast.Attribute`` chain back to dotted form."""
446
+ if isinstance(node, ast.Name):
447
+ return node.id
448
+ if isinstance(node, ast.Attribute):
449
+ head = _format_attr(node.value)
450
+ return f"{head}.{node.attr}" if head else node.attr
451
+ return None
452
+
453
+
454
+ # ---------------------------------------------------------------------------
455
+ # HTTP route / call recognition (framework-agnostic, pattern-driven)
456
+ # ---------------------------------------------------------------------------
457
+
458
+ _HTTP_VERB_NAMES = frozenset({"get", "post", "put", "delete", "patch", "head", "options"})
459
+ _HTTP_HIGH_CONF_CLIENTS = frozenset({"requests", "httpx", "aiohttp", "urllib3"})
460
+
461
+
462
+ def _extract_http_route(decorators: list[ast.expr]) -> dict[str, str] | None:
463
+ """Return the ``http_route`` metadata if any decorator looks like a route.
464
+
465
+ Two patterns are recognised, both framework-neutral:
466
+
467
+ * ``@<obj>.{get,post,put,delete,patch,head,options}("/path")`` — the
468
+ decorator's attribute name is the HTTP method.
469
+ * ``@route("/path", methods=[...])`` or ``@<obj>.route("/path", method=...)``
470
+ — the method defaults to ``GET`` when no kwarg is given.
471
+
472
+ The path must be a literal string; dynamic paths (variables, concatenation)
473
+ cannot be statically recovered and are ignored here — Bridges that need
474
+ them must look in the source file.
475
+ """
476
+ for dec in decorators:
477
+ if not isinstance(dec, ast.Call):
478
+ continue
479
+ func = dec.func
480
+
481
+ # Pattern 1: <obj>.<verb>("path", ...)
482
+ if isinstance(func, ast.Attribute):
483
+ attr = func.attr.lower()
484
+ if attr in _HTTP_VERB_NAMES:
485
+ path = _first_str_arg(dec)
486
+ if path is not None:
487
+ return {"method": attr.upper(), "path": path}
488
+
489
+ # Pattern 2: route("path", method[s]=...) or <obj>.route("path", ...)
490
+ verb_name: str | None = None
491
+ if isinstance(func, ast.Name) and func.id == "route":
492
+ verb_name = "route"
493
+ elif isinstance(func, ast.Attribute) and func.attr in {"route", "add_url_rule"}:
494
+ verb_name = func.attr
495
+ if verb_name is not None:
496
+ path = _first_str_arg(dec)
497
+ if path is not None:
498
+ method = _method_from_kwargs(dec) or "GET"
499
+ return {"method": method, "path": path}
500
+ return None
501
+
502
+
503
+ def _first_str_arg(call: ast.Call) -> str | None:
504
+ """Return ``call.args[0]`` if it is a string literal, else ``None``."""
505
+ if not call.args:
506
+ return None
507
+ first = call.args[0]
508
+ if isinstance(first, ast.Constant) and isinstance(first.value, str):
509
+ return first.value
510
+ return None
511
+
512
+
513
+ def _method_from_kwargs(call: ast.Call) -> str | None:
514
+ """Pull the HTTP method out of ``method=...`` or ``methods=[...]`` kwargs."""
515
+ for kw in call.keywords:
516
+ if kw.arg not in {"method", "methods"}:
517
+ continue
518
+ value = kw.value
519
+ if isinstance(value, ast.Constant) and isinstance(value.value, str):
520
+ return value.value.upper()
521
+ if isinstance(value, ast.List | ast.Tuple) and value.elts:
522
+ first = value.elts[0]
523
+ if isinstance(first, ast.Constant) and isinstance(first.value, str):
524
+ return first.value.upper()
525
+ return None
526
+
527
+
528
+ def _extract_http_calls(body: list[ast.stmt]) -> list[dict[str, str]]:
529
+ """Walk ``body`` and collect ``<obj>.<verb>("url", ...)`` HTTP calls.
530
+
531
+ The first positional argument must be a string literal that *looks* like a
532
+ URL — leading ``/`` or ``http(s)://``. Without that guard, harmless calls
533
+ like ``dict.get("key")`` would flood the index.
534
+
535
+ Confidence is ``high`` when the receiver is a well-known HTTP client
536
+ library (``requests`` / ``httpx`` / ``aiohttp`` / ``urllib3``), ``medium``
537
+ otherwise — for those, the receiver name didn't prove the caller meant a
538
+ network request.
539
+ """
540
+ out: list[dict[str, str]] = []
541
+ for stmt in body:
542
+ for node in ast.walk(stmt):
543
+ if not isinstance(node, ast.Call):
544
+ continue
545
+ func = node.func
546
+ if not isinstance(func, ast.Attribute):
547
+ continue
548
+ verb = func.attr.lower()
549
+ if verb not in _HTTP_VERB_NAMES:
550
+ continue
551
+ url = _first_str_arg(node)
552
+ if url is None or not _looks_like_url(url):
553
+ continue
554
+ receiver_head = _attr_head(func.value)
555
+ confidence = "high" if receiver_head in _HTTP_HIGH_CONF_CLIENTS else "medium"
556
+ out.append(
557
+ {
558
+ "method": verb.upper(),
559
+ "url": url,
560
+ "confidence": confidence,
561
+ }
562
+ )
563
+ return out
564
+
565
+
566
+ def _looks_like_url(s: str) -> bool:
567
+ return s.startswith("/") or s.startswith(("http://", "https://"))
568
+
569
+
570
+ def _attr_head(node: ast.AST) -> str | None:
571
+ """Return the leftmost ``ast.Name.id`` in an attribute chain."""
572
+ while isinstance(node, ast.Attribute):
573
+ node = node.value
574
+ return node.id if isinstance(node, ast.Name) else None
575
+
576
+
577
+ __all__ = ["LANG", "SCHEME", "PythonIndexer"]