commiter-cli 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. commiter/__init__.py +3 -0
  2. commiter/adapters/__init__.py +0 -0
  3. commiter/adapters/base.py +96 -0
  4. commiter/adapters/django_rest.py +247 -0
  5. commiter/adapters/express.py +204 -0
  6. commiter/adapters/fastapi.py +170 -0
  7. commiter/adapters/flask.py +169 -0
  8. commiter/adapters/nextjs.py +180 -0
  9. commiter/adapters/prisma.py +76 -0
  10. commiter/adapters/raw_sql.py +191 -0
  11. commiter/adapters/react.py +129 -0
  12. commiter/adapters/sqlalchemy.py +99 -0
  13. commiter/adapters/supabase.py +68 -0
  14. commiter/auth.py +130 -0
  15. commiter/cli.py +667 -0
  16. commiter/correlator.py +208 -0
  17. commiter/extractors/__init__.py +0 -0
  18. commiter/extractors/api_calls.py +91 -0
  19. commiter/extractors/api_endpoints.py +354 -0
  20. commiter/extractors/backend_files.py +33 -0
  21. commiter/extractors/base.py +40 -0
  22. commiter/extractors/db_operations.py +69 -0
  23. commiter/extractors/dependencies.py +219 -0
  24. commiter/generic_resolver.py +204 -0
  25. commiter/handler_index.py +97 -0
  26. commiter/lib.py +63 -0
  27. commiter/middleware_index.py +350 -0
  28. commiter/models.py +117 -0
  29. commiter/parser.py +1283 -0
  30. commiter/prefix_index.py +211 -0
  31. commiter/report/__init__.py +0 -0
  32. commiter/report/ai.py +120 -0
  33. commiter/report/api_guide.py +217 -0
  34. commiter/report/architecture.py +930 -0
  35. commiter/report/console.py +254 -0
  36. commiter/report/json_output.py +122 -0
  37. commiter/report/markdown.py +163 -0
  38. commiter/scanner.py +383 -0
  39. commiter/type_index.py +304 -0
  40. commiter/uploader.py +46 -0
  41. commiter/utils/__init__.py +0 -0
  42. commiter/utils/env_reader.py +78 -0
  43. commiter/utils/file_classifier.py +187 -0
  44. commiter/utils/path_helpers.py +73 -0
  45. commiter/utils/tsconfig_resolver.py +281 -0
  46. commiter/wrapper_index.py +288 -0
  47. commiter_cli-0.3.0.dist-info/METADATA +14 -0
  48. commiter_cli-0.3.0.dist-info/RECORD +96 -0
  49. commiter_cli-0.3.0.dist-info/WHEEL +5 -0
  50. commiter_cli-0.3.0.dist-info/entry_points.txt +2 -0
  51. commiter_cli-0.3.0.dist-info/top_level.txt +2 -0
  52. tests/__init__.py +0 -0
  53. tests/fixtures/arch_backend/app.py +22 -0
  54. tests/fixtures/arch_backend/middleware/__init__.py +0 -0
  55. tests/fixtures/arch_backend/middleware/rate_limit.py +4 -0
  56. tests/fixtures/arch_backend/routes/__init__.py +0 -0
  57. tests/fixtures/arch_backend/routes/analytics.py +20 -0
  58. tests/fixtures/arch_backend/routes/auth.py +29 -0
  59. tests/fixtures/arch_backend/routes/projects.py +60 -0
  60. tests/fixtures/arch_backend/routes/users.py +55 -0
  61. tests/fixtures/arch_monorepo/apps/api/app.py +30 -0
  62. tests/fixtures/arch_monorepo/apps/api/middleware/__init__.py +0 -0
  63. tests/fixtures/arch_monorepo/apps/api/middleware/auth.py +17 -0
  64. tests/fixtures/arch_monorepo/apps/api/middleware/rate_limit.py +10 -0
  65. tests/fixtures/arch_monorepo/apps/api/routes/__init__.py +0 -0
  66. tests/fixtures/arch_monorepo/apps/api/routes/auth.py +46 -0
  67. tests/fixtures/arch_monorepo/apps/api/routes/invites.py +30 -0
  68. tests/fixtures/arch_monorepo/apps/api/routes/notifications.py +25 -0
  69. tests/fixtures/arch_monorepo/apps/api/routes/projects.py +80 -0
  70. tests/fixtures/arch_monorepo/apps/api/routes/tasks.py +91 -0
  71. tests/fixtures/arch_monorepo/apps/api/routes/users.py +48 -0
  72. tests/fixtures/arch_monorepo/apps/api/services/__init__.py +0 -0
  73. tests/fixtures/arch_monorepo/apps/api/services/email.py +11 -0
  74. tests/fixtures/backend_b/app.py +17 -0
  75. tests/fixtures/fastapi_app/app.py +48 -0
  76. tests/fixtures/fastapi_crossfile/routes.py +18 -0
  77. tests/fixtures/fastapi_crossfile/schemas.py +21 -0
  78. tests/fixtures/flask_app/app.py +33 -0
  79. tests/fixtures/flask_blueprint/app.py +7 -0
  80. tests/fixtures/flask_blueprint/routes/items.py +13 -0
  81. tests/fixtures/flask_blueprint/routes/users.py +20 -0
  82. tests/fixtures/middleware_test_flask/routes/public.py +8 -0
  83. tests/fixtures/middleware_test_flask/routes/users.py +26 -0
  84. tests/fixtures/python_deep_imports/app/__init__.py +0 -0
  85. tests/fixtures/python_deep_imports/app/api/__init__.py +0 -0
  86. tests/fixtures/python_deep_imports/app/api/health.py +11 -0
  87. tests/fixtures/python_deep_imports/app/api/v1/__init__.py +0 -0
  88. tests/fixtures/python_deep_imports/app/api/v1/items.py +18 -0
  89. tests/fixtures/python_deep_imports/app/api/v1/users.py +27 -0
  90. tests/fixtures/python_deep_imports/app/schemas/__init__.py +0 -0
  91. tests/fixtures/python_deep_imports/app/schemas/item.py +13 -0
  92. tests/fixtures/python_deep_imports/app/schemas/user.py +15 -0
  93. tests/fixtures/python_deep_imports/app/shared/__init__.py +0 -0
  94. tests/fixtures/python_deep_imports/app/shared/models.py +7 -0
  95. tests/fixtures/raw_sql_test/app.py +54 -0
  96. tests/test_architecture.py +757 -0
commiter/correlator.py ADDED
@@ -0,0 +1,208 @@
1
+ """Cross-repo correlator: matches frontend API calls to backend endpoints."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ from commiter.models import (
8
+ APICall,
9
+ APIEndpoint,
10
+ RepoDocumentation,
11
+ ServiceRelationship,
12
+ )
13
+
14
+
15
+ def correlate(docs: list[RepoDocumentation]) -> list[ServiceRelationship]:
16
+ """Match frontend API calls to backend endpoints across all repos.
17
+
18
+ Returns a list of ServiceRelationship objects describing connections.
19
+ """
20
+ all_endpoints: list[APIEndpoint] = []
21
+ all_calls: list[APICall] = []
22
+
23
+ for doc in docs:
24
+ all_endpoints.extend(doc.endpoints)
25
+ all_calls.extend(doc.api_calls)
26
+
27
+ relationships = []
28
+
29
+ for call in all_calls:
30
+ best_match = _find_best_endpoint_match(call, all_endpoints)
31
+ if best_match:
32
+ endpoint, confidence = best_match
33
+ relationships.append(ServiceRelationship(
34
+ source_repo=call.repo,
35
+ target_repo=endpoint.repo,
36
+ connection_type="api_call",
37
+ source_file=f"{call.file_path}:{call.line}",
38
+ target_endpoint=f"{endpoint.http_method} {endpoint.route_pattern}",
39
+ confidence=confidence,
40
+ ))
41
+
42
+ # Detect shared dependencies between repos
43
+ relationships.extend(_find_shared_dependencies(docs))
44
+
45
+ # Detect shared database tables between repos
46
+ relationships.extend(_find_shared_db_tables(docs))
47
+
48
+ return relationships
49
+
50
+
51
+ def _find_best_endpoint_match(
52
+ call: APICall, endpoints: list[APIEndpoint]
53
+ ) -> tuple[APIEndpoint, float] | None:
54
+ """Find the best matching backend endpoint for a frontend API call."""
55
+ best: tuple[APIEndpoint, float] | None = None
56
+
57
+ call_path = _normalize_url(call.url_pattern)
58
+ call_method = call.http_method.upper()
59
+
60
+ for ep in endpoints:
61
+ ep_path = _normalize_route(ep.route_pattern)
62
+ ep_method = ep.http_method.upper()
63
+
64
+ # Method must match (or one is ALL)
65
+ if call_method != ep_method and call_method != "ALL" and ep_method != "ALL":
66
+ continue
67
+
68
+ confidence = _path_similarity(call_path, ep_path)
69
+ if confidence > 0 and (best is None or confidence > best[1]):
70
+ best = (ep, confidence)
71
+
72
+ return best
73
+
74
+
75
+ def _normalize_url(url: str) -> str:
76
+ """Normalize a frontend URL for comparison.
77
+
78
+ Strips protocol, host, base path variables, and normalizes parameters.
79
+ """
80
+ # Remove protocol + host
81
+ url = re.sub(r"^https?://[^/]+", "", url)
82
+ # Remove template literal expressions like ${API_URL}
83
+ url = re.sub(r"\$\{[^}]+\}", "", url)
84
+ # Remove env variable references
85
+ url = re.sub(r"process\.env\.\w+", "", url)
86
+ # Strip leading/trailing slashes and normalize
87
+ url = url.strip("/")
88
+ # Replace template literal params ${id} with :param placeholder
89
+ url = re.sub(r"\$\{(\w+)\}", r":\1", url)
90
+ return url
91
+
92
+
93
+ def _normalize_route(route: str) -> str:
94
+ """Normalize a backend route pattern for comparison.
95
+
96
+ Converts all param syntaxes to a common format.
97
+ """
98
+ route = route.strip("/")
99
+ # Flask: <param> or <int:param>
100
+ route = re.sub(r"<(?:\w+:)?(\w+)>", r":\1", route)
101
+ # FastAPI: {param}
102
+ route = re.sub(r"\{(\w+)\}", r":\1", route)
103
+ # Next.js: [param]
104
+ route = re.sub(r"\[(\w+)\]", r":\1", route)
105
+ return route
106
+
107
+
108
+ def _path_similarity(call_path: str, endpoint_path: str) -> float:
109
+ """Calculate similarity between a frontend call URL and backend route.
110
+
111
+ Returns 0.0 (no match) to 1.0 (exact match).
112
+ """
113
+ call_parts = [p for p in call_path.split("/") if p]
114
+ ep_parts = [p for p in endpoint_path.split("/") if p]
115
+
116
+ if not call_parts or not ep_parts:
117
+ return 0.0
118
+
119
+ # Different lengths — check if one is a suffix of the other
120
+ if len(call_parts) != len(ep_parts):
121
+ # Try suffix matching (frontend might include /api prefix that backend doesn't)
122
+ shorter, longer = (call_parts, ep_parts) if len(call_parts) <= len(ep_parts) else (ep_parts, call_parts)
123
+ for offset in range(len(longer) - len(shorter) + 1):
124
+ if _segments_match(shorter, longer[offset:offset + len(shorter)]):
125
+ return 0.7 # partial match
126
+ return 0.0
127
+
128
+ if _segments_match(call_parts, ep_parts):
129
+ return 1.0
130
+
131
+ return 0.0
132
+
133
+
134
+ def _segments_match(a: list[str], b: list[str]) -> bool:
135
+ """Check if two path segment lists match, treating :params as wildcards."""
136
+ if len(a) != len(b):
137
+ return False
138
+ for sa, sb in zip(a, b):
139
+ if sa.startswith(":") or sb.startswith(":"):
140
+ continue # parameter — always matches
141
+ if sa != sb:
142
+ return False
143
+ return True
144
+
145
+
146
+ def _find_shared_dependencies(docs: list[RepoDocumentation]) -> list[ServiceRelationship]:
147
+ """Find packages used by multiple repos."""
148
+ relationships = []
149
+ if len(docs) < 2:
150
+ return relationships
151
+
152
+ # Build dep name -> list of repos
153
+ dep_repos: dict[str, list[str]] = {}
154
+ for doc in docs:
155
+ for dep in doc.dependencies:
156
+ if dep.dev_only:
157
+ continue
158
+ dep_repos.setdefault(dep.name, []).append(doc.repo_name)
159
+
160
+ for dep_name, repos in dep_repos.items():
161
+ if len(repos) > 1:
162
+ for i in range(len(repos)):
163
+ for j in range(i + 1, len(repos)):
164
+ relationships.append(ServiceRelationship(
165
+ source_repo=repos[i],
166
+ target_repo=repos[j],
167
+ connection_type="shared_package",
168
+ source_file=dep_name,
169
+ target_endpoint=dep_name,
170
+ confidence=0.5,
171
+ ))
172
+
173
+ return relationships
174
+
175
+
176
+ def _find_shared_db_tables(docs: list[RepoDocumentation]) -> list[ServiceRelationship]:
177
+ """Find database tables accessed by multiple repos (shared data layer)."""
178
+ relationships = []
179
+ if len(docs) < 2:
180
+ return relationships
181
+
182
+ # Build table_name -> list of (repo_name, first_file_reference)
183
+ table_repos: dict[str, list[tuple[str, str]]] = {}
184
+ for doc in docs:
185
+ # Collect unique tables per repo (normalized to lowercase)
186
+ repo_tables: dict[str, str] = {}
187
+ for op in doc.db_operations:
188
+ table_lower = op.table_name.lower()
189
+ if table_lower not in repo_tables:
190
+ repo_tables[table_lower] = f"{op.file_path}:{op.line}"
191
+
192
+ for table, first_ref in repo_tables.items():
193
+ table_repos.setdefault(table, []).append((doc.repo_name, first_ref))
194
+
195
+ for table_name, repos in table_repos.items():
196
+ if len(repos) > 1:
197
+ for i in range(len(repos)):
198
+ for j in range(i + 1, len(repos)):
199
+ relationships.append(ServiceRelationship(
200
+ source_repo=repos[i][0],
201
+ target_repo=repos[j][0],
202
+ connection_type="shared_db",
203
+ source_file=repos[i][1],
204
+ target_endpoint=f"table:{table_name}",
205
+ confidence=0.8,
206
+ ))
207
+
208
+ return relationships
File without changes
@@ -0,0 +1,91 @@
1
+ """Extract frontend API calls (fetch, axios, etc.) from component files."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING
7
+
8
+ from commiter.adapters.react import ReactAdapter
9
+ from commiter.extractors.base import BaseExtractor
10
+ from commiter.models import APICall
11
+ from commiter.parser import get_source, build_constants_from_file
12
+
13
+ if TYPE_CHECKING:
14
+ from tree_sitter import Tree
15
+
16
+ _FRONTEND_ADAPTER = ReactAdapter()
17
+
18
+
19
+ class APICallExtractor(BaseExtractor):
20
+ name = "api_calls"
21
+ _env_vars: dict[str, str] = {}
22
+
23
+ def set_env_vars(self, env_vars: dict[str, str]) -> None:
24
+ """Set environment variables from .env files (called by scanner)."""
25
+ self._env_vars = env_vars
26
+
27
+ def can_handle(self, file_path: str, language: str | None) -> bool:
28
+ if language not in ("javascript", "typescript", "tsx"):
29
+ return False
30
+ # Focus on frontend-like files (components, pages, hooks, lib)
31
+ lower = file_path.lower()
32
+ frontend_hints = (
33
+ "components/", "pages/", "src/", "app/", "hooks/",
34
+ "lib/", "utils/", "services/", "features/",
35
+ )
36
+ # Exclude API route files (backend)
37
+ if "/pages/api/" in lower or "/route." in Path(file_path).name.lower():
38
+ return False
39
+ return any(hint in lower for hint in frontend_hints) or language in ("tsx",)
40
+
41
+ def extract(self, file_path: str, repo_name: str, tree: Tree | None = None, language: str | None = None) -> list[APICall]:
42
+ if tree is None:
43
+ return []
44
+
45
+ source = get_source(file_path)
46
+
47
+ # Build per-file constants for URL resolution
48
+ constants = build_constants_from_file(tree.root_node, source, self._env_vars)
49
+ matches = _FRONTEND_ADAPTER.find_api_calls(tree, source, file_path=file_path, constants=constants)
50
+
51
+ # Derive component/page name from file path
52
+ component_name = self._infer_component_name(file_path)
53
+
54
+ calls = []
55
+ for match in matches:
56
+ calls.append(APICall(
57
+ repo=repo_name,
58
+ file_path=file_path,
59
+ line=match.line,
60
+ component_or_page=component_name,
61
+ http_method=match.http_method,
62
+ url_pattern=match.url_pattern,
63
+ client_library=self._detect_library(match),
64
+ response_type=match.response_type,
65
+ body_type=match.body_type,
66
+ ))
67
+
68
+ return calls
69
+
70
+ def _infer_component_name(self, file_path: str) -> str:
71
+ """Derive a component/page name from the file path."""
72
+ path = Path(file_path)
73
+ # Use parent dir + stem for index files
74
+ if path.stem in ("index", "page"):
75
+ return path.parent.name
76
+ return path.stem
77
+
78
+ def _detect_library(self, match) -> str:
79
+ """Detect which HTTP library was used."""
80
+ # Based on the adapter that found it — for now just check the call text
81
+ # ReactAdapter already distinguishes fetch vs axios
82
+ if hasattr(match, "call_node"):
83
+ from commiter.parser import node_text
84
+ # Simple heuristic based on node content
85
+ try:
86
+ source = match.call_node.text
87
+ if source and b"axios" in source:
88
+ return "axios"
89
+ except (AttributeError, TypeError):
90
+ pass
91
+ return "fetch"
@@ -0,0 +1,354 @@
1
+ """Extract API endpoint definitions from backend files."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING
7
+
8
+ from commiter.adapters.base import BaseAdapter
9
+ from commiter.adapters.flask import FlaskAdapter
10
+ from commiter.adapters.fastapi import FastAPIAdapter
11
+ from commiter.adapters.express import ExpressAdapter
12
+ from commiter.adapters.nextjs import NextJSAdapter
13
+ from commiter.adapters.django_rest import DjangoRESTAdapter
14
+ from commiter.extractors.base import BaseExtractor
15
+ from commiter.models import APIEndpoint, Param, ParamSource
16
+ from commiter.parser import (
17
+ get_source, find_ts_interface_fields, find_ts_enum_declarations,
18
+ find_ts_type_aliases, find_ts_const_objects, find_py_class_fields,
19
+ parse_generic_type, TypeField,
20
+ )
21
+ from commiter.generic_resolver import resolve_generic_type
22
+
23
+ if TYPE_CHECKING:
24
+ from tree_sitter import Tree
25
+
26
+ # Map framework names to adapter instances
27
+ _ADAPTERS: dict[str, BaseAdapter] = {
28
+ "flask": FlaskAdapter(),
29
+ "fastapi": FastAPIAdapter(),
30
+ "express": ExpressAdapter(),
31
+ "nextjs": NextJSAdapter(),
32
+ "django-rest": DjangoRESTAdapter(),
33
+ }
34
+
35
+
36
+ def _register_adapter(name: str, adapter: BaseAdapter) -> None:
37
+ """Register an adapter (used by later phases to add Express, Next.js, etc.)."""
38
+ _ADAPTERS[name] = adapter
39
+
40
+
41
+ def get_adapters() -> dict[str, BaseAdapter]:
42
+ """Return all registered adapters."""
43
+ return dict(_ADAPTERS)
44
+
45
+
46
+ class APIEndpointExtractor(BaseExtractor):
47
+ name = "api_endpoints"
48
+
49
+ def __init__(self) -> None:
50
+ self._type_index = None
51
+ self._prefix_index = None
52
+ self._middleware_index = None
53
+ self._handler_index = None
54
+
55
+ def set_type_index(self, type_index) -> None:
56
+ """Set the cross-file type index (called by scanner after Pass 1)."""
57
+ self._type_index = type_index
58
+
59
+ def set_prefix_index(self, prefix_index) -> None:
60
+ """Set the cross-file prefix index (called by scanner after Pass 1)."""
61
+ self._prefix_index = prefix_index
62
+
63
+ def set_middleware_index(self, middleware_index) -> None:
64
+ """Set the cross-file middleware index (called by scanner after Pass 1)."""
65
+ self._middleware_index = middleware_index
66
+
67
+ def set_handler_index(self, handler_index) -> None:
68
+ """Set the cross-file handler type index (called by scanner after Pass 1)."""
69
+ self._handler_index = handler_index
70
+
71
+ def can_handle(self, file_path: str, language: str | None) -> bool:
72
+ if language is None:
73
+ return False
74
+ # Only handle languages we have adapters for
75
+ return language in ("python", "javascript", "typescript", "tsx")
76
+
77
+ def extract(self, file_path: str, repo_name: str, tree: Tree | None = None, language: str | None = None) -> list[APIEndpoint]:
78
+ if tree is None or language is None:
79
+ return []
80
+
81
+ source = get_source(file_path)
82
+ endpoints = []
83
+
84
+ # Build type definitions for resolution (same-file + cross-file fallback)
85
+ type_defs = self._build_type_definitions(tree, source, language, file_path)
86
+
87
+ # Detect which framework the file actually uses (from imports/requires)
88
+ file_frameworks = self._detect_file_framework(source, language)
89
+
90
+ # Languages that JS adapters should also handle
91
+ js_family = {"javascript", "typescript", "tsx"}
92
+
93
+ # Try each adapter that matches the language
94
+ for adapter in _ADAPTERS.values():
95
+ # Match adapter language to file language
96
+ adapter_matches = (
97
+ adapter.language == language
98
+ or (adapter.language in js_family and language in js_family)
99
+ )
100
+ if not adapter_matches:
101
+ continue
102
+
103
+ # Skip adapters that don't match the file's actual framework
104
+ if file_frameworks is not None and adapter.framework_name not in file_frameworks:
105
+ continue
106
+
107
+ # NextJS adapter needs file_path for convention-based routing
108
+ if isinstance(adapter, NextJSAdapter):
109
+ routes = adapter.find_route_definitions(tree, source, file_path=file_path)
110
+ else:
111
+ routes = adapter.find_route_definitions(tree, source)
112
+ for route in routes:
113
+ # Resolve prefix from blueprint/router mounting
114
+ if self._prefix_index and route.router_var:
115
+ prefix = self._prefix_index.get_prefix_for_file(file_path, route.router_var)
116
+ if prefix:
117
+ route.route_pattern = _join_prefix(prefix, route.route_pattern)
118
+
119
+ # Extract details, passing type definitions for resolution
120
+ path_param_names = adapter.extract_path_params(route.route_pattern)
121
+ auth = adapter.extract_auth_info(route, source)
122
+
123
+ # Pass type_defs to adapters that support it
124
+ if hasattr(adapter.extract_request_body_fields, '__code__') and \
125
+ 'type_definitions' in adapter.extract_request_body_fields.__code__.co_varnames:
126
+ body_fields = adapter.extract_request_body_fields(route, source, type_definitions=type_defs)
127
+ else:
128
+ body_fields = adapter.extract_request_body_fields(route, source)
129
+
130
+ response_fields = adapter.extract_response_fields(route, source)
131
+
132
+ params = [
133
+ Param(name=p, source=ParamSource.PATH, type_hint=route.param_types.get(p))
134
+ for p in path_param_names
135
+ ]
136
+
137
+ # Pick up type names from route.param_types (set by adapters)
138
+ request_body_type = route.param_types.get("_request_body_type")
139
+ response_type = route.param_types.get("_response_type")
140
+
141
+ # For class-based handlers: look up body type from handler index
142
+ if not request_body_type and self._handler_index and "." in route.handler_name:
143
+ request_body_type = self._handler_index.get_body_type(route.handler_name)
144
+ # If we found a body type, also resolve its fields
145
+ if request_body_type and request_body_type in type_defs:
146
+ body_fields = []
147
+ for tf in type_defs[request_body_type]:
148
+ opt = "?" if tf.optional else ""
149
+ body_fields.append(f"{tf.name}: {tf.type_str}{opt}")
150
+
151
+ # Query middleware index for middleware covering this route
152
+ mw = []
153
+ if self._middleware_index:
154
+ mw = self._middleware_index.get_middleware_for_route(
155
+ file_path, route.route_pattern, route.router_var, route.line,
156
+ )
157
+
158
+ endpoints.append(APIEndpoint(
159
+ repo=repo_name,
160
+ file_path=file_path,
161
+ line=route.line,
162
+ http_method=route.http_method,
163
+ route_pattern=route.route_pattern,
164
+ handler_name=route.handler_name,
165
+ framework=adapter.framework_name,
166
+ parameters=params,
167
+ request_body_fields=body_fields,
168
+ response_fields=response_fields,
169
+ auth_decorators=auth,
170
+ request_body_type=request_body_type,
171
+ response_type=response_type,
172
+ middleware=mw,
173
+ ))
174
+
175
+ return endpoints
176
+
177
+ @staticmethod
178
+ def _detect_file_framework(source: bytes, language: str) -> set[str] | None:
179
+ """Detect which framework a file uses based on its imports.
180
+
181
+ Returns a set of framework names, or None if we can't determine (allow all).
182
+ """
183
+ text = source.decode("utf-8", errors="replace")
184
+
185
+ if language == "python":
186
+ frameworks = set()
187
+ if "from flask" in text or "import flask" in text:
188
+ frameworks.add("flask")
189
+ if "from fastapi" in text or "import fastapi" in text:
190
+ frameworks.add("fastapi")
191
+ if "from django" in text or "from rest_framework" in text:
192
+ frameworks.add("django-rest")
193
+ return frameworks if frameworks else None
194
+
195
+ if language in ("javascript", "typescript", "tsx"):
196
+ frameworks = set()
197
+ if "express" in text and ("require(" in text or "from " in text):
198
+ frameworks.add("express")
199
+ if "next" in text or "/pages/api/" in text or "/app/" in text:
200
+ frameworks.add("nextjs")
201
+ # If we found any backend indicators, return them; if none found,
202
+ # return empty set to prevent backend adapters from matching frontend files
203
+ if frameworks:
204
+ return frameworks
205
+ # Check if this looks like a backend file at all (has route-like patterns)
206
+ has_route_patterns = ("router." in text or "app." in text) and ("require" in text or "express" in text)
207
+ if not has_route_patterns:
208
+ return set() # empty = no backend adapters should run
209
+ return None
210
+
211
+ return None
212
+
213
+ def _build_type_definitions(self, tree, source: bytes, language: str, file_path: str) -> dict[str, list[TypeField]]:
214
+ """Build a dict of type definitions for resolution.
215
+
216
+ Combines same-file definitions with cross-file lookups from the TypeIndex.
217
+ """
218
+ type_defs: dict[str, list[TypeField]] = {}
219
+
220
+ if language in ("typescript", "tsx"):
221
+ type_defs = find_ts_interface_fields(tree.root_node, source)
222
+ type_defs.update(find_ts_enum_declarations(tree.root_node, source))
223
+ type_defs.update(find_ts_type_aliases(tree.root_node, source))
224
+ type_defs.update(find_ts_const_objects(tree.root_node, source))
225
+ elif language == "python":
226
+ import re
227
+ text = source.decode("utf-8", errors="replace")
228
+ for match in re.finditer(r'class\s+(\w+)\s*\([^)]*(?:BaseModel|Schema|TypedDict)', text):
229
+ class_name = match.group(1)
230
+ fields = find_py_class_fields(tree.root_node, source, class_name)
231
+ if fields:
232
+ type_defs[class_name] = fields
233
+
234
+ # Wrap with cross-file fallback if TypeIndex is available
235
+ if self._type_index is not None:
236
+ return _TypeDefsWithFallback(type_defs, self._type_index, file_path)
237
+
238
+ return type_defs
239
+
240
+
241
+ class _TypeDefsWithFallback(dict):
242
+ """Dict-like that falls back to TypeIndex for missing keys."""
243
+
244
+ def __init__(self, local_defs: dict, type_index, file_path: str):
245
+ super().__init__(local_defs)
246
+ self._type_index = type_index
247
+ self._file_path = file_path
248
+
249
+ def __bool__(self):
250
+ # Always truthy so `if type_definitions and ...` doesn't short-circuit
251
+ return True
252
+
253
+ def __contains__(self, key):
254
+ if super().__contains__(key):
255
+ return True
256
+ # Check cross-file index
257
+ typedef = self._type_index.resolve(key, None, self._file_path)
258
+ if typedef is not None:
259
+ return True
260
+ # Check if it's a generic type we can resolve
261
+ if "<" in key or key.endswith("[]"):
262
+ resolved = self._try_generic_resolve(key)
263
+ if resolved is not None:
264
+ return True
265
+ return False
266
+
267
+ def __getitem__(self, key):
268
+ try:
269
+ fields = super().__getitem__(key)
270
+ except KeyError:
271
+ typedef = self._type_index.resolve(key, None, self._file_path)
272
+ if typedef is not None:
273
+ fields = typedef.fields
274
+ else:
275
+ # Try generic resolution: Pick<User, "id">, Partial<User>, etc.
276
+ if "<" in key or key.endswith("[]"):
277
+ resolved = self._try_generic_resolve(key)
278
+ if resolved is not None:
279
+ return self._resolve_type_refs(resolved)
280
+ raise
281
+ # Resolve any __type_ref__ markers (from intersections/unions)
282
+ return self._resolve_type_refs(fields)
283
+
284
+ def _try_generic_resolve(self, type_str: str) -> list | None:
285
+ """Try to resolve a generic type like Pick<User, "id"> to fields."""
286
+ def type_lookup(name):
287
+ try:
288
+ return list(self[name]) # recursive — uses our own __getitem__
289
+ except KeyError:
290
+ return None
291
+
292
+ def generic_def_lookup(name):
293
+ """Look up a generic type definition and return (fields, generic_params)."""
294
+ # Try local
295
+ try:
296
+ fields = list(super(_TypeDefsWithFallback, self).__getitem__(name))
297
+ # Check type_index for generic_params
298
+ typedef = self._type_index.resolve(name, None, self._file_path)
299
+ params = typedef.generic_params if typedef else []
300
+ return (fields, params)
301
+ except KeyError:
302
+ pass
303
+ # Try cross-file
304
+ typedef = self._type_index.resolve(name, None, self._file_path)
305
+ if typedef:
306
+ return (list(typedef.fields), typedef.generic_params)
307
+ return None
308
+
309
+ return resolve_generic_type(type_str, type_lookup, generic_def_lookup)
310
+
311
+ def _resolve_type_refs(self, fields: list, depth: int = 0) -> list:
312
+ """Recursively resolve __type_ref__ and __generic__ markers to actual fields."""
313
+ if depth > 5:
314
+ return fields # guard against circular refs
315
+
316
+ resolved = []
317
+ for field in fields:
318
+ if field.type_str == "__type_ref__":
319
+ ref_fields = self._lookup_type(field.name)
320
+ if ref_fields:
321
+ resolved.extend(self._resolve_type_refs(ref_fields, depth + 1))
322
+ else:
323
+ resolved.append(field)
324
+ elif field.type_str == "__generic__" and field.value:
325
+ # Generic type alias: Pick<User, "id">, Partial<User>, ApiResponse<User>
326
+ generic_fields = self._try_generic_resolve(field.value)
327
+ if generic_fields:
328
+ resolved.extend(self._resolve_type_refs(generic_fields, depth + 1))
329
+ else:
330
+ resolved.append(field)
331
+ else:
332
+ resolved.append(field)
333
+ return resolved
334
+
335
+ def _lookup_type(self, type_name: str) -> list | None:
336
+ """Look up a type by name in local defs or cross-file index."""
337
+ # Try local first
338
+ try:
339
+ return list(super().__getitem__(type_name))
340
+ except KeyError:
341
+ pass
342
+ # Try cross-file
343
+ typedef = self._type_index.resolve(type_name, None, self._file_path)
344
+ if typedef is not None:
345
+ return list(typedef.fields)
346
+ return None
347
+
348
+
349
+ def _join_prefix(prefix: str, route: str) -> str:
350
+ """Join a URL prefix with a route pattern, avoiding double slashes."""
351
+ prefix = prefix.rstrip("/")
352
+ if not route.startswith("/"):
353
+ route = "/" + route
354
+ return prefix + route
@@ -0,0 +1,33 @@
1
+ """Classify files as backend, frontend, config, test, etc."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ from commiter.extractors.base import BaseExtractor
8
+ from commiter.models import FileClassification
9
+ from commiter.utils.file_classifier import classify_file
10
+
11
+ if TYPE_CHECKING:
12
+ from tree_sitter import Tree
13
+
14
+
15
+ class BackendFileExtractor(BaseExtractor):
16
+ """Classifies files by role. Returns FileClassification objects.
17
+
18
+ Note: The scanner already classifies files, so this extractor is mainly
19
+ useful for enriching classifications with AST-based hints (e.g., detecting
20
+ that a .py file imports Flask and is therefore a backend file).
21
+ """
22
+
23
+ name = "backend_files"
24
+
25
+ def can_handle(self, file_path: str, language: str | None) -> bool:
26
+ # We handle all files — classification is universal
27
+ return True
28
+
29
+ def extract(self, file_path: str, repo_name: str, tree: Tree | None = None, language: str | None = None) -> list[FileClassification]:
30
+ # The scanner already does basic classification. This extractor could
31
+ # enrich it with AST-based detection in the future (e.g., checking imports).
32
+ # For now, return empty to avoid duplicates — the scanner handles it.
33
+ return []