commiter-cli 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- commiter/__init__.py +3 -0
- commiter/adapters/__init__.py +0 -0
- commiter/adapters/base.py +96 -0
- commiter/adapters/django_rest.py +247 -0
- commiter/adapters/express.py +204 -0
- commiter/adapters/fastapi.py +170 -0
- commiter/adapters/flask.py +169 -0
- commiter/adapters/nextjs.py +180 -0
- commiter/adapters/prisma.py +76 -0
- commiter/adapters/raw_sql.py +191 -0
- commiter/adapters/react.py +129 -0
- commiter/adapters/sqlalchemy.py +99 -0
- commiter/adapters/supabase.py +68 -0
- commiter/auth.py +130 -0
- commiter/cli.py +667 -0
- commiter/correlator.py +208 -0
- commiter/extractors/__init__.py +0 -0
- commiter/extractors/api_calls.py +91 -0
- commiter/extractors/api_endpoints.py +354 -0
- commiter/extractors/backend_files.py +33 -0
- commiter/extractors/base.py +40 -0
- commiter/extractors/db_operations.py +69 -0
- commiter/extractors/dependencies.py +219 -0
- commiter/generic_resolver.py +204 -0
- commiter/handler_index.py +97 -0
- commiter/lib.py +63 -0
- commiter/middleware_index.py +350 -0
- commiter/models.py +117 -0
- commiter/parser.py +1283 -0
- commiter/prefix_index.py +211 -0
- commiter/report/__init__.py +0 -0
- commiter/report/ai.py +120 -0
- commiter/report/api_guide.py +217 -0
- commiter/report/architecture.py +930 -0
- commiter/report/console.py +254 -0
- commiter/report/json_output.py +122 -0
- commiter/report/markdown.py +163 -0
- commiter/scanner.py +383 -0
- commiter/type_index.py +304 -0
- commiter/uploader.py +46 -0
- commiter/utils/__init__.py +0 -0
- commiter/utils/env_reader.py +78 -0
- commiter/utils/file_classifier.py +187 -0
- commiter/utils/path_helpers.py +73 -0
- commiter/utils/tsconfig_resolver.py +281 -0
- commiter/wrapper_index.py +288 -0
- commiter_cli-0.3.0.dist-info/METADATA +14 -0
- commiter_cli-0.3.0.dist-info/RECORD +96 -0
- commiter_cli-0.3.0.dist-info/WHEEL +5 -0
- commiter_cli-0.3.0.dist-info/entry_points.txt +2 -0
- commiter_cli-0.3.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +0 -0
- tests/fixtures/arch_backend/app.py +22 -0
- tests/fixtures/arch_backend/middleware/__init__.py +0 -0
- tests/fixtures/arch_backend/middleware/rate_limit.py +4 -0
- tests/fixtures/arch_backend/routes/__init__.py +0 -0
- tests/fixtures/arch_backend/routes/analytics.py +20 -0
- tests/fixtures/arch_backend/routes/auth.py +29 -0
- tests/fixtures/arch_backend/routes/projects.py +60 -0
- tests/fixtures/arch_backend/routes/users.py +55 -0
- tests/fixtures/arch_monorepo/apps/api/app.py +30 -0
- tests/fixtures/arch_monorepo/apps/api/middleware/__init__.py +0 -0
- tests/fixtures/arch_monorepo/apps/api/middleware/auth.py +17 -0
- tests/fixtures/arch_monorepo/apps/api/middleware/rate_limit.py +10 -0
- tests/fixtures/arch_monorepo/apps/api/routes/__init__.py +0 -0
- tests/fixtures/arch_monorepo/apps/api/routes/auth.py +46 -0
- tests/fixtures/arch_monorepo/apps/api/routes/invites.py +30 -0
- tests/fixtures/arch_monorepo/apps/api/routes/notifications.py +25 -0
- tests/fixtures/arch_monorepo/apps/api/routes/projects.py +80 -0
- tests/fixtures/arch_monorepo/apps/api/routes/tasks.py +91 -0
- tests/fixtures/arch_monorepo/apps/api/routes/users.py +48 -0
- tests/fixtures/arch_monorepo/apps/api/services/__init__.py +0 -0
- tests/fixtures/arch_monorepo/apps/api/services/email.py +11 -0
- tests/fixtures/backend_b/app.py +17 -0
- tests/fixtures/fastapi_app/app.py +48 -0
- tests/fixtures/fastapi_crossfile/routes.py +18 -0
- tests/fixtures/fastapi_crossfile/schemas.py +21 -0
- tests/fixtures/flask_app/app.py +33 -0
- tests/fixtures/flask_blueprint/app.py +7 -0
- tests/fixtures/flask_blueprint/routes/items.py +13 -0
- tests/fixtures/flask_blueprint/routes/users.py +20 -0
- tests/fixtures/middleware_test_flask/routes/public.py +8 -0
- tests/fixtures/middleware_test_flask/routes/users.py +26 -0
- tests/fixtures/python_deep_imports/app/__init__.py +0 -0
- tests/fixtures/python_deep_imports/app/api/__init__.py +0 -0
- tests/fixtures/python_deep_imports/app/api/health.py +11 -0
- tests/fixtures/python_deep_imports/app/api/v1/__init__.py +0 -0
- tests/fixtures/python_deep_imports/app/api/v1/items.py +18 -0
- tests/fixtures/python_deep_imports/app/api/v1/users.py +27 -0
- tests/fixtures/python_deep_imports/app/schemas/__init__.py +0 -0
- tests/fixtures/python_deep_imports/app/schemas/item.py +13 -0
- tests/fixtures/python_deep_imports/app/schemas/user.py +15 -0
- tests/fixtures/python_deep_imports/app/shared/__init__.py +0 -0
- tests/fixtures/python_deep_imports/app/shared/models.py +7 -0
- tests/fixtures/raw_sql_test/app.py +54 -0
- tests/test_architecture.py +757 -0
commiter/correlator.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
"""Cross-repo correlator: matches frontend API calls to backend endpoints."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
from commiter.models import (
|
|
8
|
+
APICall,
|
|
9
|
+
APIEndpoint,
|
|
10
|
+
RepoDocumentation,
|
|
11
|
+
ServiceRelationship,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def correlate(docs: list[RepoDocumentation]) -> list[ServiceRelationship]:
|
|
16
|
+
"""Match frontend API calls to backend endpoints across all repos.
|
|
17
|
+
|
|
18
|
+
Returns a list of ServiceRelationship objects describing connections.
|
|
19
|
+
"""
|
|
20
|
+
all_endpoints: list[APIEndpoint] = []
|
|
21
|
+
all_calls: list[APICall] = []
|
|
22
|
+
|
|
23
|
+
for doc in docs:
|
|
24
|
+
all_endpoints.extend(doc.endpoints)
|
|
25
|
+
all_calls.extend(doc.api_calls)
|
|
26
|
+
|
|
27
|
+
relationships = []
|
|
28
|
+
|
|
29
|
+
for call in all_calls:
|
|
30
|
+
best_match = _find_best_endpoint_match(call, all_endpoints)
|
|
31
|
+
if best_match:
|
|
32
|
+
endpoint, confidence = best_match
|
|
33
|
+
relationships.append(ServiceRelationship(
|
|
34
|
+
source_repo=call.repo,
|
|
35
|
+
target_repo=endpoint.repo,
|
|
36
|
+
connection_type="api_call",
|
|
37
|
+
source_file=f"{call.file_path}:{call.line}",
|
|
38
|
+
target_endpoint=f"{endpoint.http_method} {endpoint.route_pattern}",
|
|
39
|
+
confidence=confidence,
|
|
40
|
+
))
|
|
41
|
+
|
|
42
|
+
# Detect shared dependencies between repos
|
|
43
|
+
relationships.extend(_find_shared_dependencies(docs))
|
|
44
|
+
|
|
45
|
+
# Detect shared database tables between repos
|
|
46
|
+
relationships.extend(_find_shared_db_tables(docs))
|
|
47
|
+
|
|
48
|
+
return relationships
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _find_best_endpoint_match(
|
|
52
|
+
call: APICall, endpoints: list[APIEndpoint]
|
|
53
|
+
) -> tuple[APIEndpoint, float] | None:
|
|
54
|
+
"""Find the best matching backend endpoint for a frontend API call."""
|
|
55
|
+
best: tuple[APIEndpoint, float] | None = None
|
|
56
|
+
|
|
57
|
+
call_path = _normalize_url(call.url_pattern)
|
|
58
|
+
call_method = call.http_method.upper()
|
|
59
|
+
|
|
60
|
+
for ep in endpoints:
|
|
61
|
+
ep_path = _normalize_route(ep.route_pattern)
|
|
62
|
+
ep_method = ep.http_method.upper()
|
|
63
|
+
|
|
64
|
+
# Method must match (or one is ALL)
|
|
65
|
+
if call_method != ep_method and call_method != "ALL" and ep_method != "ALL":
|
|
66
|
+
continue
|
|
67
|
+
|
|
68
|
+
confidence = _path_similarity(call_path, ep_path)
|
|
69
|
+
if confidence > 0 and (best is None or confidence > best[1]):
|
|
70
|
+
best = (ep, confidence)
|
|
71
|
+
|
|
72
|
+
return best
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _normalize_url(url: str) -> str:
|
|
76
|
+
"""Normalize a frontend URL for comparison.
|
|
77
|
+
|
|
78
|
+
Strips protocol, host, base path variables, and normalizes parameters.
|
|
79
|
+
"""
|
|
80
|
+
# Remove protocol + host
|
|
81
|
+
url = re.sub(r"^https?://[^/]+", "", url)
|
|
82
|
+
# Remove template literal expressions like ${API_URL}
|
|
83
|
+
url = re.sub(r"\$\{[^}]+\}", "", url)
|
|
84
|
+
# Remove env variable references
|
|
85
|
+
url = re.sub(r"process\.env\.\w+", "", url)
|
|
86
|
+
# Strip leading/trailing slashes and normalize
|
|
87
|
+
url = url.strip("/")
|
|
88
|
+
# Replace template literal params ${id} with :param placeholder
|
|
89
|
+
url = re.sub(r"\$\{(\w+)\}", r":\1", url)
|
|
90
|
+
return url
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _normalize_route(route: str) -> str:
|
|
94
|
+
"""Normalize a backend route pattern for comparison.
|
|
95
|
+
|
|
96
|
+
Converts all param syntaxes to a common format.
|
|
97
|
+
"""
|
|
98
|
+
route = route.strip("/")
|
|
99
|
+
# Flask: <param> or <int:param>
|
|
100
|
+
route = re.sub(r"<(?:\w+:)?(\w+)>", r":\1", route)
|
|
101
|
+
# FastAPI: {param}
|
|
102
|
+
route = re.sub(r"\{(\w+)\}", r":\1", route)
|
|
103
|
+
# Next.js: [param]
|
|
104
|
+
route = re.sub(r"\[(\w+)\]", r":\1", route)
|
|
105
|
+
return route
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _path_similarity(call_path: str, endpoint_path: str) -> float:
|
|
109
|
+
"""Calculate similarity between a frontend call URL and backend route.
|
|
110
|
+
|
|
111
|
+
Returns 0.0 (no match) to 1.0 (exact match).
|
|
112
|
+
"""
|
|
113
|
+
call_parts = [p for p in call_path.split("/") if p]
|
|
114
|
+
ep_parts = [p for p in endpoint_path.split("/") if p]
|
|
115
|
+
|
|
116
|
+
if not call_parts or not ep_parts:
|
|
117
|
+
return 0.0
|
|
118
|
+
|
|
119
|
+
# Different lengths — check if one is a suffix of the other
|
|
120
|
+
if len(call_parts) != len(ep_parts):
|
|
121
|
+
# Try suffix matching (frontend might include /api prefix that backend doesn't)
|
|
122
|
+
shorter, longer = (call_parts, ep_parts) if len(call_parts) <= len(ep_parts) else (ep_parts, call_parts)
|
|
123
|
+
for offset in range(len(longer) - len(shorter) + 1):
|
|
124
|
+
if _segments_match(shorter, longer[offset:offset + len(shorter)]):
|
|
125
|
+
return 0.7 # partial match
|
|
126
|
+
return 0.0
|
|
127
|
+
|
|
128
|
+
if _segments_match(call_parts, ep_parts):
|
|
129
|
+
return 1.0
|
|
130
|
+
|
|
131
|
+
return 0.0
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _segments_match(a: list[str], b: list[str]) -> bool:
|
|
135
|
+
"""Check if two path segment lists match, treating :params as wildcards."""
|
|
136
|
+
if len(a) != len(b):
|
|
137
|
+
return False
|
|
138
|
+
for sa, sb in zip(a, b):
|
|
139
|
+
if sa.startswith(":") or sb.startswith(":"):
|
|
140
|
+
continue # parameter — always matches
|
|
141
|
+
if sa != sb:
|
|
142
|
+
return False
|
|
143
|
+
return True
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _find_shared_dependencies(docs: list[RepoDocumentation]) -> list[ServiceRelationship]:
|
|
147
|
+
"""Find packages used by multiple repos."""
|
|
148
|
+
relationships = []
|
|
149
|
+
if len(docs) < 2:
|
|
150
|
+
return relationships
|
|
151
|
+
|
|
152
|
+
# Build dep name -> list of repos
|
|
153
|
+
dep_repos: dict[str, list[str]] = {}
|
|
154
|
+
for doc in docs:
|
|
155
|
+
for dep in doc.dependencies:
|
|
156
|
+
if dep.dev_only:
|
|
157
|
+
continue
|
|
158
|
+
dep_repos.setdefault(dep.name, []).append(doc.repo_name)
|
|
159
|
+
|
|
160
|
+
for dep_name, repos in dep_repos.items():
|
|
161
|
+
if len(repos) > 1:
|
|
162
|
+
for i in range(len(repos)):
|
|
163
|
+
for j in range(i + 1, len(repos)):
|
|
164
|
+
relationships.append(ServiceRelationship(
|
|
165
|
+
source_repo=repos[i],
|
|
166
|
+
target_repo=repos[j],
|
|
167
|
+
connection_type="shared_package",
|
|
168
|
+
source_file=dep_name,
|
|
169
|
+
target_endpoint=dep_name,
|
|
170
|
+
confidence=0.5,
|
|
171
|
+
))
|
|
172
|
+
|
|
173
|
+
return relationships
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _find_shared_db_tables(docs: list[RepoDocumentation]) -> list[ServiceRelationship]:
|
|
177
|
+
"""Find database tables accessed by multiple repos (shared data layer)."""
|
|
178
|
+
relationships = []
|
|
179
|
+
if len(docs) < 2:
|
|
180
|
+
return relationships
|
|
181
|
+
|
|
182
|
+
# Build table_name -> list of (repo_name, first_file_reference)
|
|
183
|
+
table_repos: dict[str, list[tuple[str, str]]] = {}
|
|
184
|
+
for doc in docs:
|
|
185
|
+
# Collect unique tables per repo (normalized to lowercase)
|
|
186
|
+
repo_tables: dict[str, str] = {}
|
|
187
|
+
for op in doc.db_operations:
|
|
188
|
+
table_lower = op.table_name.lower()
|
|
189
|
+
if table_lower not in repo_tables:
|
|
190
|
+
repo_tables[table_lower] = f"{op.file_path}:{op.line}"
|
|
191
|
+
|
|
192
|
+
for table, first_ref in repo_tables.items():
|
|
193
|
+
table_repos.setdefault(table, []).append((doc.repo_name, first_ref))
|
|
194
|
+
|
|
195
|
+
for table_name, repos in table_repos.items():
|
|
196
|
+
if len(repos) > 1:
|
|
197
|
+
for i in range(len(repos)):
|
|
198
|
+
for j in range(i + 1, len(repos)):
|
|
199
|
+
relationships.append(ServiceRelationship(
|
|
200
|
+
source_repo=repos[i][0],
|
|
201
|
+
target_repo=repos[j][0],
|
|
202
|
+
connection_type="shared_db",
|
|
203
|
+
source_file=repos[i][1],
|
|
204
|
+
target_endpoint=f"table:{table_name}",
|
|
205
|
+
confidence=0.8,
|
|
206
|
+
))
|
|
207
|
+
|
|
208
|
+
return relationships
|
|
File without changes
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Extract frontend API calls (fetch, axios, etc.) from component files."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from commiter.adapters.react import ReactAdapter
|
|
9
|
+
from commiter.extractors.base import BaseExtractor
|
|
10
|
+
from commiter.models import APICall
|
|
11
|
+
from commiter.parser import get_source, build_constants_from_file
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from tree_sitter import Tree
|
|
15
|
+
|
|
16
|
+
_FRONTEND_ADAPTER = ReactAdapter()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class APICallExtractor(BaseExtractor):
|
|
20
|
+
name = "api_calls"
|
|
21
|
+
_env_vars: dict[str, str] = {}
|
|
22
|
+
|
|
23
|
+
def set_env_vars(self, env_vars: dict[str, str]) -> None:
|
|
24
|
+
"""Set environment variables from .env files (called by scanner)."""
|
|
25
|
+
self._env_vars = env_vars
|
|
26
|
+
|
|
27
|
+
def can_handle(self, file_path: str, language: str | None) -> bool:
|
|
28
|
+
if language not in ("javascript", "typescript", "tsx"):
|
|
29
|
+
return False
|
|
30
|
+
# Focus on frontend-like files (components, pages, hooks, lib)
|
|
31
|
+
lower = file_path.lower()
|
|
32
|
+
frontend_hints = (
|
|
33
|
+
"components/", "pages/", "src/", "app/", "hooks/",
|
|
34
|
+
"lib/", "utils/", "services/", "features/",
|
|
35
|
+
)
|
|
36
|
+
# Exclude API route files (backend)
|
|
37
|
+
if "/pages/api/" in lower or "/route." in Path(file_path).name.lower():
|
|
38
|
+
return False
|
|
39
|
+
return any(hint in lower for hint in frontend_hints) or language in ("tsx",)
|
|
40
|
+
|
|
41
|
+
def extract(self, file_path: str, repo_name: str, tree: Tree | None = None, language: str | None = None) -> list[APICall]:
|
|
42
|
+
if tree is None:
|
|
43
|
+
return []
|
|
44
|
+
|
|
45
|
+
source = get_source(file_path)
|
|
46
|
+
|
|
47
|
+
# Build per-file constants for URL resolution
|
|
48
|
+
constants = build_constants_from_file(tree.root_node, source, self._env_vars)
|
|
49
|
+
matches = _FRONTEND_ADAPTER.find_api_calls(tree, source, file_path=file_path, constants=constants)
|
|
50
|
+
|
|
51
|
+
# Derive component/page name from file path
|
|
52
|
+
component_name = self._infer_component_name(file_path)
|
|
53
|
+
|
|
54
|
+
calls = []
|
|
55
|
+
for match in matches:
|
|
56
|
+
calls.append(APICall(
|
|
57
|
+
repo=repo_name,
|
|
58
|
+
file_path=file_path,
|
|
59
|
+
line=match.line,
|
|
60
|
+
component_or_page=component_name,
|
|
61
|
+
http_method=match.http_method,
|
|
62
|
+
url_pattern=match.url_pattern,
|
|
63
|
+
client_library=self._detect_library(match),
|
|
64
|
+
response_type=match.response_type,
|
|
65
|
+
body_type=match.body_type,
|
|
66
|
+
))
|
|
67
|
+
|
|
68
|
+
return calls
|
|
69
|
+
|
|
70
|
+
def _infer_component_name(self, file_path: str) -> str:
|
|
71
|
+
"""Derive a component/page name from the file path."""
|
|
72
|
+
path = Path(file_path)
|
|
73
|
+
# Use parent dir + stem for index files
|
|
74
|
+
if path.stem in ("index", "page"):
|
|
75
|
+
return path.parent.name
|
|
76
|
+
return path.stem
|
|
77
|
+
|
|
78
|
+
def _detect_library(self, match) -> str:
|
|
79
|
+
"""Detect which HTTP library was used."""
|
|
80
|
+
# Based on the adapter that found it — for now just check the call text
|
|
81
|
+
# ReactAdapter already distinguishes fetch vs axios
|
|
82
|
+
if hasattr(match, "call_node"):
|
|
83
|
+
from commiter.parser import node_text
|
|
84
|
+
# Simple heuristic based on node content
|
|
85
|
+
try:
|
|
86
|
+
source = match.call_node.text
|
|
87
|
+
if source and b"axios" in source:
|
|
88
|
+
return "axios"
|
|
89
|
+
except (AttributeError, TypeError):
|
|
90
|
+
pass
|
|
91
|
+
return "fetch"
|
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
"""Extract API endpoint definitions from backend files."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from commiter.adapters.base import BaseAdapter
|
|
9
|
+
from commiter.adapters.flask import FlaskAdapter
|
|
10
|
+
from commiter.adapters.fastapi import FastAPIAdapter
|
|
11
|
+
from commiter.adapters.express import ExpressAdapter
|
|
12
|
+
from commiter.adapters.nextjs import NextJSAdapter
|
|
13
|
+
from commiter.adapters.django_rest import DjangoRESTAdapter
|
|
14
|
+
from commiter.extractors.base import BaseExtractor
|
|
15
|
+
from commiter.models import APIEndpoint, Param, ParamSource
|
|
16
|
+
from commiter.parser import (
|
|
17
|
+
get_source, find_ts_interface_fields, find_ts_enum_declarations,
|
|
18
|
+
find_ts_type_aliases, find_ts_const_objects, find_py_class_fields,
|
|
19
|
+
parse_generic_type, TypeField,
|
|
20
|
+
)
|
|
21
|
+
from commiter.generic_resolver import resolve_generic_type
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from tree_sitter import Tree
|
|
25
|
+
|
|
26
|
+
# Map framework names to adapter instances
|
|
27
|
+
_ADAPTERS: dict[str, BaseAdapter] = {
|
|
28
|
+
"flask": FlaskAdapter(),
|
|
29
|
+
"fastapi": FastAPIAdapter(),
|
|
30
|
+
"express": ExpressAdapter(),
|
|
31
|
+
"nextjs": NextJSAdapter(),
|
|
32
|
+
"django-rest": DjangoRESTAdapter(),
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _register_adapter(name: str, adapter: BaseAdapter) -> None:
|
|
37
|
+
"""Register an adapter (used by later phases to add Express, Next.js, etc.)."""
|
|
38
|
+
_ADAPTERS[name] = adapter
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def get_adapters() -> dict[str, BaseAdapter]:
|
|
42
|
+
"""Return all registered adapters."""
|
|
43
|
+
return dict(_ADAPTERS)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class APIEndpointExtractor(BaseExtractor):
|
|
47
|
+
name = "api_endpoints"
|
|
48
|
+
|
|
49
|
+
def __init__(self) -> None:
|
|
50
|
+
self._type_index = None
|
|
51
|
+
self._prefix_index = None
|
|
52
|
+
self._middleware_index = None
|
|
53
|
+
self._handler_index = None
|
|
54
|
+
|
|
55
|
+
def set_type_index(self, type_index) -> None:
|
|
56
|
+
"""Set the cross-file type index (called by scanner after Pass 1)."""
|
|
57
|
+
self._type_index = type_index
|
|
58
|
+
|
|
59
|
+
def set_prefix_index(self, prefix_index) -> None:
|
|
60
|
+
"""Set the cross-file prefix index (called by scanner after Pass 1)."""
|
|
61
|
+
self._prefix_index = prefix_index
|
|
62
|
+
|
|
63
|
+
def set_middleware_index(self, middleware_index) -> None:
|
|
64
|
+
"""Set the cross-file middleware index (called by scanner after Pass 1)."""
|
|
65
|
+
self._middleware_index = middleware_index
|
|
66
|
+
|
|
67
|
+
def set_handler_index(self, handler_index) -> None:
|
|
68
|
+
"""Set the cross-file handler type index (called by scanner after Pass 1)."""
|
|
69
|
+
self._handler_index = handler_index
|
|
70
|
+
|
|
71
|
+
def can_handle(self, file_path: str, language: str | None) -> bool:
|
|
72
|
+
if language is None:
|
|
73
|
+
return False
|
|
74
|
+
# Only handle languages we have adapters for
|
|
75
|
+
return language in ("python", "javascript", "typescript", "tsx")
|
|
76
|
+
|
|
77
|
+
def extract(self, file_path: str, repo_name: str, tree: Tree | None = None, language: str | None = None) -> list[APIEndpoint]:
|
|
78
|
+
if tree is None or language is None:
|
|
79
|
+
return []
|
|
80
|
+
|
|
81
|
+
source = get_source(file_path)
|
|
82
|
+
endpoints = []
|
|
83
|
+
|
|
84
|
+
# Build type definitions for resolution (same-file + cross-file fallback)
|
|
85
|
+
type_defs = self._build_type_definitions(tree, source, language, file_path)
|
|
86
|
+
|
|
87
|
+
# Detect which framework the file actually uses (from imports/requires)
|
|
88
|
+
file_frameworks = self._detect_file_framework(source, language)
|
|
89
|
+
|
|
90
|
+
# Languages that JS adapters should also handle
|
|
91
|
+
js_family = {"javascript", "typescript", "tsx"}
|
|
92
|
+
|
|
93
|
+
# Try each adapter that matches the language
|
|
94
|
+
for adapter in _ADAPTERS.values():
|
|
95
|
+
# Match adapter language to file language
|
|
96
|
+
adapter_matches = (
|
|
97
|
+
adapter.language == language
|
|
98
|
+
or (adapter.language in js_family and language in js_family)
|
|
99
|
+
)
|
|
100
|
+
if not adapter_matches:
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
# Skip adapters that don't match the file's actual framework
|
|
104
|
+
if file_frameworks is not None and adapter.framework_name not in file_frameworks:
|
|
105
|
+
continue
|
|
106
|
+
|
|
107
|
+
# NextJS adapter needs file_path for convention-based routing
|
|
108
|
+
if isinstance(adapter, NextJSAdapter):
|
|
109
|
+
routes = adapter.find_route_definitions(tree, source, file_path=file_path)
|
|
110
|
+
else:
|
|
111
|
+
routes = adapter.find_route_definitions(tree, source)
|
|
112
|
+
for route in routes:
|
|
113
|
+
# Resolve prefix from blueprint/router mounting
|
|
114
|
+
if self._prefix_index and route.router_var:
|
|
115
|
+
prefix = self._prefix_index.get_prefix_for_file(file_path, route.router_var)
|
|
116
|
+
if prefix:
|
|
117
|
+
route.route_pattern = _join_prefix(prefix, route.route_pattern)
|
|
118
|
+
|
|
119
|
+
# Extract details, passing type definitions for resolution
|
|
120
|
+
path_param_names = adapter.extract_path_params(route.route_pattern)
|
|
121
|
+
auth = adapter.extract_auth_info(route, source)
|
|
122
|
+
|
|
123
|
+
# Pass type_defs to adapters that support it
|
|
124
|
+
if hasattr(adapter.extract_request_body_fields, '__code__') and \
|
|
125
|
+
'type_definitions' in adapter.extract_request_body_fields.__code__.co_varnames:
|
|
126
|
+
body_fields = adapter.extract_request_body_fields(route, source, type_definitions=type_defs)
|
|
127
|
+
else:
|
|
128
|
+
body_fields = adapter.extract_request_body_fields(route, source)
|
|
129
|
+
|
|
130
|
+
response_fields = adapter.extract_response_fields(route, source)
|
|
131
|
+
|
|
132
|
+
params = [
|
|
133
|
+
Param(name=p, source=ParamSource.PATH, type_hint=route.param_types.get(p))
|
|
134
|
+
for p in path_param_names
|
|
135
|
+
]
|
|
136
|
+
|
|
137
|
+
# Pick up type names from route.param_types (set by adapters)
|
|
138
|
+
request_body_type = route.param_types.get("_request_body_type")
|
|
139
|
+
response_type = route.param_types.get("_response_type")
|
|
140
|
+
|
|
141
|
+
# For class-based handlers: look up body type from handler index
|
|
142
|
+
if not request_body_type and self._handler_index and "." in route.handler_name:
|
|
143
|
+
request_body_type = self._handler_index.get_body_type(route.handler_name)
|
|
144
|
+
# If we found a body type, also resolve its fields
|
|
145
|
+
if request_body_type and request_body_type in type_defs:
|
|
146
|
+
body_fields = []
|
|
147
|
+
for tf in type_defs[request_body_type]:
|
|
148
|
+
opt = "?" if tf.optional else ""
|
|
149
|
+
body_fields.append(f"{tf.name}: {tf.type_str}{opt}")
|
|
150
|
+
|
|
151
|
+
# Query middleware index for middleware covering this route
|
|
152
|
+
mw = []
|
|
153
|
+
if self._middleware_index:
|
|
154
|
+
mw = self._middleware_index.get_middleware_for_route(
|
|
155
|
+
file_path, route.route_pattern, route.router_var, route.line,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
endpoints.append(APIEndpoint(
|
|
159
|
+
repo=repo_name,
|
|
160
|
+
file_path=file_path,
|
|
161
|
+
line=route.line,
|
|
162
|
+
http_method=route.http_method,
|
|
163
|
+
route_pattern=route.route_pattern,
|
|
164
|
+
handler_name=route.handler_name,
|
|
165
|
+
framework=adapter.framework_name,
|
|
166
|
+
parameters=params,
|
|
167
|
+
request_body_fields=body_fields,
|
|
168
|
+
response_fields=response_fields,
|
|
169
|
+
auth_decorators=auth,
|
|
170
|
+
request_body_type=request_body_type,
|
|
171
|
+
response_type=response_type,
|
|
172
|
+
middleware=mw,
|
|
173
|
+
))
|
|
174
|
+
|
|
175
|
+
return endpoints
|
|
176
|
+
|
|
177
|
+
@staticmethod
|
|
178
|
+
def _detect_file_framework(source: bytes, language: str) -> set[str] | None:
|
|
179
|
+
"""Detect which framework a file uses based on its imports.
|
|
180
|
+
|
|
181
|
+
Returns a set of framework names, or None if we can't determine (allow all).
|
|
182
|
+
"""
|
|
183
|
+
text = source.decode("utf-8", errors="replace")
|
|
184
|
+
|
|
185
|
+
if language == "python":
|
|
186
|
+
frameworks = set()
|
|
187
|
+
if "from flask" in text or "import flask" in text:
|
|
188
|
+
frameworks.add("flask")
|
|
189
|
+
if "from fastapi" in text or "import fastapi" in text:
|
|
190
|
+
frameworks.add("fastapi")
|
|
191
|
+
if "from django" in text or "from rest_framework" in text:
|
|
192
|
+
frameworks.add("django-rest")
|
|
193
|
+
return frameworks if frameworks else None
|
|
194
|
+
|
|
195
|
+
if language in ("javascript", "typescript", "tsx"):
|
|
196
|
+
frameworks = set()
|
|
197
|
+
if "express" in text and ("require(" in text or "from " in text):
|
|
198
|
+
frameworks.add("express")
|
|
199
|
+
if "next" in text or "/pages/api/" in text or "/app/" in text:
|
|
200
|
+
frameworks.add("nextjs")
|
|
201
|
+
# If we found any backend indicators, return them; if none found,
|
|
202
|
+
# return empty set to prevent backend adapters from matching frontend files
|
|
203
|
+
if frameworks:
|
|
204
|
+
return frameworks
|
|
205
|
+
# Check if this looks like a backend file at all (has route-like patterns)
|
|
206
|
+
has_route_patterns = ("router." in text or "app." in text) and ("require" in text or "express" in text)
|
|
207
|
+
if not has_route_patterns:
|
|
208
|
+
return set() # empty = no backend adapters should run
|
|
209
|
+
return None
|
|
210
|
+
|
|
211
|
+
return None
|
|
212
|
+
|
|
213
|
+
def _build_type_definitions(self, tree, source: bytes, language: str, file_path: str) -> dict[str, list[TypeField]]:
|
|
214
|
+
"""Build a dict of type definitions for resolution.
|
|
215
|
+
|
|
216
|
+
Combines same-file definitions with cross-file lookups from the TypeIndex.
|
|
217
|
+
"""
|
|
218
|
+
type_defs: dict[str, list[TypeField]] = {}
|
|
219
|
+
|
|
220
|
+
if language in ("typescript", "tsx"):
|
|
221
|
+
type_defs = find_ts_interface_fields(tree.root_node, source)
|
|
222
|
+
type_defs.update(find_ts_enum_declarations(tree.root_node, source))
|
|
223
|
+
type_defs.update(find_ts_type_aliases(tree.root_node, source))
|
|
224
|
+
type_defs.update(find_ts_const_objects(tree.root_node, source))
|
|
225
|
+
elif language == "python":
|
|
226
|
+
import re
|
|
227
|
+
text = source.decode("utf-8", errors="replace")
|
|
228
|
+
for match in re.finditer(r'class\s+(\w+)\s*\([^)]*(?:BaseModel|Schema|TypedDict)', text):
|
|
229
|
+
class_name = match.group(1)
|
|
230
|
+
fields = find_py_class_fields(tree.root_node, source, class_name)
|
|
231
|
+
if fields:
|
|
232
|
+
type_defs[class_name] = fields
|
|
233
|
+
|
|
234
|
+
# Wrap with cross-file fallback if TypeIndex is available
|
|
235
|
+
if self._type_index is not None:
|
|
236
|
+
return _TypeDefsWithFallback(type_defs, self._type_index, file_path)
|
|
237
|
+
|
|
238
|
+
return type_defs
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
class _TypeDefsWithFallback(dict):
|
|
242
|
+
"""Dict-like that falls back to TypeIndex for missing keys."""
|
|
243
|
+
|
|
244
|
+
def __init__(self, local_defs: dict, type_index, file_path: str):
|
|
245
|
+
super().__init__(local_defs)
|
|
246
|
+
self._type_index = type_index
|
|
247
|
+
self._file_path = file_path
|
|
248
|
+
|
|
249
|
+
def __bool__(self):
|
|
250
|
+
# Always truthy so `if type_definitions and ...` doesn't short-circuit
|
|
251
|
+
return True
|
|
252
|
+
|
|
253
|
+
def __contains__(self, key):
|
|
254
|
+
if super().__contains__(key):
|
|
255
|
+
return True
|
|
256
|
+
# Check cross-file index
|
|
257
|
+
typedef = self._type_index.resolve(key, None, self._file_path)
|
|
258
|
+
if typedef is not None:
|
|
259
|
+
return True
|
|
260
|
+
# Check if it's a generic type we can resolve
|
|
261
|
+
if "<" in key or key.endswith("[]"):
|
|
262
|
+
resolved = self._try_generic_resolve(key)
|
|
263
|
+
if resolved is not None:
|
|
264
|
+
return True
|
|
265
|
+
return False
|
|
266
|
+
|
|
267
|
+
def __getitem__(self, key):
|
|
268
|
+
try:
|
|
269
|
+
fields = super().__getitem__(key)
|
|
270
|
+
except KeyError:
|
|
271
|
+
typedef = self._type_index.resolve(key, None, self._file_path)
|
|
272
|
+
if typedef is not None:
|
|
273
|
+
fields = typedef.fields
|
|
274
|
+
else:
|
|
275
|
+
# Try generic resolution: Pick<User, "id">, Partial<User>, etc.
|
|
276
|
+
if "<" in key or key.endswith("[]"):
|
|
277
|
+
resolved = self._try_generic_resolve(key)
|
|
278
|
+
if resolved is not None:
|
|
279
|
+
return self._resolve_type_refs(resolved)
|
|
280
|
+
raise
|
|
281
|
+
# Resolve any __type_ref__ markers (from intersections/unions)
|
|
282
|
+
return self._resolve_type_refs(fields)
|
|
283
|
+
|
|
284
|
+
def _try_generic_resolve(self, type_str: str) -> list | None:
|
|
285
|
+
"""Try to resolve a generic type like Pick<User, "id"> to fields."""
|
|
286
|
+
def type_lookup(name):
|
|
287
|
+
try:
|
|
288
|
+
return list(self[name]) # recursive — uses our own __getitem__
|
|
289
|
+
except KeyError:
|
|
290
|
+
return None
|
|
291
|
+
|
|
292
|
+
def generic_def_lookup(name):
|
|
293
|
+
"""Look up a generic type definition and return (fields, generic_params)."""
|
|
294
|
+
# Try local
|
|
295
|
+
try:
|
|
296
|
+
fields = list(super(_TypeDefsWithFallback, self).__getitem__(name))
|
|
297
|
+
# Check type_index for generic_params
|
|
298
|
+
typedef = self._type_index.resolve(name, None, self._file_path)
|
|
299
|
+
params = typedef.generic_params if typedef else []
|
|
300
|
+
return (fields, params)
|
|
301
|
+
except KeyError:
|
|
302
|
+
pass
|
|
303
|
+
# Try cross-file
|
|
304
|
+
typedef = self._type_index.resolve(name, None, self._file_path)
|
|
305
|
+
if typedef:
|
|
306
|
+
return (list(typedef.fields), typedef.generic_params)
|
|
307
|
+
return None
|
|
308
|
+
|
|
309
|
+
return resolve_generic_type(type_str, type_lookup, generic_def_lookup)
|
|
310
|
+
|
|
311
|
+
def _resolve_type_refs(self, fields: list, depth: int = 0) -> list:
|
|
312
|
+
"""Recursively resolve __type_ref__ and __generic__ markers to actual fields."""
|
|
313
|
+
if depth > 5:
|
|
314
|
+
return fields # guard against circular refs
|
|
315
|
+
|
|
316
|
+
resolved = []
|
|
317
|
+
for field in fields:
|
|
318
|
+
if field.type_str == "__type_ref__":
|
|
319
|
+
ref_fields = self._lookup_type(field.name)
|
|
320
|
+
if ref_fields:
|
|
321
|
+
resolved.extend(self._resolve_type_refs(ref_fields, depth + 1))
|
|
322
|
+
else:
|
|
323
|
+
resolved.append(field)
|
|
324
|
+
elif field.type_str == "__generic__" and field.value:
|
|
325
|
+
# Generic type alias: Pick<User, "id">, Partial<User>, ApiResponse<User>
|
|
326
|
+
generic_fields = self._try_generic_resolve(field.value)
|
|
327
|
+
if generic_fields:
|
|
328
|
+
resolved.extend(self._resolve_type_refs(generic_fields, depth + 1))
|
|
329
|
+
else:
|
|
330
|
+
resolved.append(field)
|
|
331
|
+
else:
|
|
332
|
+
resolved.append(field)
|
|
333
|
+
return resolved
|
|
334
|
+
|
|
335
|
+
def _lookup_type(self, type_name: str) -> list | None:
|
|
336
|
+
"""Look up a type by name in local defs or cross-file index."""
|
|
337
|
+
# Try local first
|
|
338
|
+
try:
|
|
339
|
+
return list(super().__getitem__(type_name))
|
|
340
|
+
except KeyError:
|
|
341
|
+
pass
|
|
342
|
+
# Try cross-file
|
|
343
|
+
typedef = self._type_index.resolve(type_name, None, self._file_path)
|
|
344
|
+
if typedef is not None:
|
|
345
|
+
return list(typedef.fields)
|
|
346
|
+
return None
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def _join_prefix(prefix: str, route: str) -> str:
|
|
350
|
+
"""Join a URL prefix with a route pattern, avoiding double slashes."""
|
|
351
|
+
prefix = prefix.rstrip("/")
|
|
352
|
+
if not route.startswith("/"):
|
|
353
|
+
route = "/" + route
|
|
354
|
+
return prefix + route
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Classify files as backend, frontend, config, test, etc."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
from commiter.extractors.base import BaseExtractor
|
|
8
|
+
from commiter.models import FileClassification
|
|
9
|
+
from commiter.utils.file_classifier import classify_file
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from tree_sitter import Tree
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class BackendFileExtractor(BaseExtractor):
|
|
16
|
+
"""Classifies files by role. Returns FileClassification objects.
|
|
17
|
+
|
|
18
|
+
Note: The scanner already classifies files, so this extractor is mainly
|
|
19
|
+
useful for enriching classifications with AST-based hints (e.g., detecting
|
|
20
|
+
that a .py file imports Flask and is therefore a backend file).
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
name = "backend_files"
|
|
24
|
+
|
|
25
|
+
def can_handle(self, file_path: str, language: str | None) -> bool:
|
|
26
|
+
# We handle all files — classification is universal
|
|
27
|
+
return True
|
|
28
|
+
|
|
29
|
+
def extract(self, file_path: str, repo_name: str, tree: Tree | None = None, language: str | None = None) -> list[FileClassification]:
|
|
30
|
+
# The scanner already does basic classification. This extractor could
|
|
31
|
+
# enrich it with AST-based detection in the future (e.g., checking imports).
|
|
32
|
+
# For now, return empty to avoid duplicates — the scanner handles it.
|
|
33
|
+
return []
|