commiter-cli 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- commiter/__init__.py +3 -0
- commiter/adapters/__init__.py +0 -0
- commiter/adapters/base.py +96 -0
- commiter/adapters/django_rest.py +247 -0
- commiter/adapters/express.py +204 -0
- commiter/adapters/fastapi.py +170 -0
- commiter/adapters/flask.py +169 -0
- commiter/adapters/nextjs.py +180 -0
- commiter/adapters/prisma.py +76 -0
- commiter/adapters/raw_sql.py +191 -0
- commiter/adapters/react.py +129 -0
- commiter/adapters/sqlalchemy.py +99 -0
- commiter/adapters/supabase.py +68 -0
- commiter/auth.py +130 -0
- commiter/cli.py +667 -0
- commiter/correlator.py +208 -0
- commiter/extractors/__init__.py +0 -0
- commiter/extractors/api_calls.py +91 -0
- commiter/extractors/api_endpoints.py +354 -0
- commiter/extractors/backend_files.py +33 -0
- commiter/extractors/base.py +40 -0
- commiter/extractors/db_operations.py +69 -0
- commiter/extractors/dependencies.py +219 -0
- commiter/generic_resolver.py +204 -0
- commiter/handler_index.py +97 -0
- commiter/lib.py +63 -0
- commiter/middleware_index.py +350 -0
- commiter/models.py +117 -0
- commiter/parser.py +1283 -0
- commiter/prefix_index.py +211 -0
- commiter/report/__init__.py +0 -0
- commiter/report/ai.py +120 -0
- commiter/report/api_guide.py +217 -0
- commiter/report/architecture.py +930 -0
- commiter/report/console.py +254 -0
- commiter/report/json_output.py +122 -0
- commiter/report/markdown.py +163 -0
- commiter/scanner.py +383 -0
- commiter/type_index.py +304 -0
- commiter/uploader.py +46 -0
- commiter/utils/__init__.py +0 -0
- commiter/utils/env_reader.py +78 -0
- commiter/utils/file_classifier.py +187 -0
- commiter/utils/path_helpers.py +73 -0
- commiter/utils/tsconfig_resolver.py +281 -0
- commiter/wrapper_index.py +288 -0
- commiter_cli-0.3.0.dist-info/METADATA +14 -0
- commiter_cli-0.3.0.dist-info/RECORD +96 -0
- commiter_cli-0.3.0.dist-info/WHEEL +5 -0
- commiter_cli-0.3.0.dist-info/entry_points.txt +2 -0
- commiter_cli-0.3.0.dist-info/top_level.txt +2 -0
- tests/__init__.py +0 -0
- tests/fixtures/arch_backend/app.py +22 -0
- tests/fixtures/arch_backend/middleware/__init__.py +0 -0
- tests/fixtures/arch_backend/middleware/rate_limit.py +4 -0
- tests/fixtures/arch_backend/routes/__init__.py +0 -0
- tests/fixtures/arch_backend/routes/analytics.py +20 -0
- tests/fixtures/arch_backend/routes/auth.py +29 -0
- tests/fixtures/arch_backend/routes/projects.py +60 -0
- tests/fixtures/arch_backend/routes/users.py +55 -0
- tests/fixtures/arch_monorepo/apps/api/app.py +30 -0
- tests/fixtures/arch_monorepo/apps/api/middleware/__init__.py +0 -0
- tests/fixtures/arch_monorepo/apps/api/middleware/auth.py +17 -0
- tests/fixtures/arch_monorepo/apps/api/middleware/rate_limit.py +10 -0
- tests/fixtures/arch_monorepo/apps/api/routes/__init__.py +0 -0
- tests/fixtures/arch_monorepo/apps/api/routes/auth.py +46 -0
- tests/fixtures/arch_monorepo/apps/api/routes/invites.py +30 -0
- tests/fixtures/arch_monorepo/apps/api/routes/notifications.py +25 -0
- tests/fixtures/arch_monorepo/apps/api/routes/projects.py +80 -0
- tests/fixtures/arch_monorepo/apps/api/routes/tasks.py +91 -0
- tests/fixtures/arch_monorepo/apps/api/routes/users.py +48 -0
- tests/fixtures/arch_monorepo/apps/api/services/__init__.py +0 -0
- tests/fixtures/arch_monorepo/apps/api/services/email.py +11 -0
- tests/fixtures/backend_b/app.py +17 -0
- tests/fixtures/fastapi_app/app.py +48 -0
- tests/fixtures/fastapi_crossfile/routes.py +18 -0
- tests/fixtures/fastapi_crossfile/schemas.py +21 -0
- tests/fixtures/flask_app/app.py +33 -0
- tests/fixtures/flask_blueprint/app.py +7 -0
- tests/fixtures/flask_blueprint/routes/items.py +13 -0
- tests/fixtures/flask_blueprint/routes/users.py +20 -0
- tests/fixtures/middleware_test_flask/routes/public.py +8 -0
- tests/fixtures/middleware_test_flask/routes/users.py +26 -0
- tests/fixtures/python_deep_imports/app/__init__.py +0 -0
- tests/fixtures/python_deep_imports/app/api/__init__.py +0 -0
- tests/fixtures/python_deep_imports/app/api/health.py +11 -0
- tests/fixtures/python_deep_imports/app/api/v1/__init__.py +0 -0
- tests/fixtures/python_deep_imports/app/api/v1/items.py +18 -0
- tests/fixtures/python_deep_imports/app/api/v1/users.py +27 -0
- tests/fixtures/python_deep_imports/app/schemas/__init__.py +0 -0
- tests/fixtures/python_deep_imports/app/schemas/item.py +13 -0
- tests/fixtures/python_deep_imports/app/schemas/user.py +15 -0
- tests/fixtures/python_deep_imports/app/shared/__init__.py +0 -0
- tests/fixtures/python_deep_imports/app/shared/models.py +7 -0
- tests/fixtures/raw_sql_test/app.py +54 -0
- tests/test_architecture.py +757 -0
commiter/scanner.py
ADDED
|
@@ -0,0 +1,383 @@
|
|
|
1
|
+
"""Orchestrator: discovers files, runs extractors, assembles documentation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from commiter.extractors.base import BaseExtractor
|
|
10
|
+
from commiter.extractors.dependencies import DependencyExtractor
|
|
11
|
+
from commiter.models import RepoDocumentation, APIEndpoint, APICall, Dependency, FileClassification, DBOperation
|
|
12
|
+
from commiter.parser import (
|
|
13
|
+
parse_file, get_source, find_js_imports,
|
|
14
|
+
find_nodes_by_type, node_text, resolve_url_from_node, build_constants_from_file,
|
|
15
|
+
)
|
|
16
|
+
from commiter.utils.env_reader import load_env_files
|
|
17
|
+
from commiter.utils.file_classifier import classify_file, detect_repo_frameworks
|
|
18
|
+
from commiter.utils.path_helpers import walk_repo, detect_repo_name
|
|
19
|
+
from commiter.utils.tsconfig_resolver import TSConfigRegistry
|
|
20
|
+
from commiter.handler_index import HandlerIndex
|
|
21
|
+
from commiter.middleware_index import MiddlewareIndex
|
|
22
|
+
from commiter.prefix_index import PrefixIndex
|
|
23
|
+
from commiter.type_index import TypeIndex
|
|
24
|
+
from commiter.wrapper_index import WrapperIndex
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class ScanResult:
|
|
29
|
+
"""Extended scan output that includes indexes needed by the architecture format."""
|
|
30
|
+
doc: RepoDocumentation
|
|
31
|
+
type_index: TypeIndex
|
|
32
|
+
middleware_index: MiddlewareIndex
|
|
33
|
+
file_list: list[str] = field(default_factory=list)
|
|
34
|
+
alias_resolver: TSConfigRegistry | None = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _default_extractors() -> list[BaseExtractor]:
|
|
38
|
+
"""Return all available extractors."""
|
|
39
|
+
# Import here to avoid circular imports when more extractors are added
|
|
40
|
+
from commiter.extractors.dependencies import DependencyExtractor
|
|
41
|
+
|
|
42
|
+
extractors: list[BaseExtractor] = [
|
|
43
|
+
DependencyExtractor(),
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
# Conditionally import extractors that may not exist yet
|
|
47
|
+
try:
|
|
48
|
+
from commiter.extractors.api_endpoints import APIEndpointExtractor
|
|
49
|
+
extractors.append(APIEndpointExtractor())
|
|
50
|
+
except ImportError:
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
from commiter.extractors.api_calls import APICallExtractor
|
|
55
|
+
extractors.append(APICallExtractor())
|
|
56
|
+
except ImportError:
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
from commiter.extractors.backend_files import BackendFileExtractor
|
|
61
|
+
extractors.append(BackendFileExtractor())
|
|
62
|
+
except ImportError:
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
from commiter.extractors.db_operations import DBOperationExtractor
|
|
67
|
+
extractors.append(DBOperationExtractor())
|
|
68
|
+
except ImportError:
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
return extractors
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _scan_repo_full(
|
|
75
|
+
repo_root: str,
|
|
76
|
+
extra_excludes: list[str] | None = None,
|
|
77
|
+
extractors: list[BaseExtractor] | None = None,
|
|
78
|
+
) -> ScanResult:
|
|
79
|
+
"""Scan a single repository and return documentation with indexes.
|
|
80
|
+
|
|
81
|
+
This is the internal implementation that preserves indexes for consumers
|
|
82
|
+
like the architecture output format.
|
|
83
|
+
"""
|
|
84
|
+
repo_name = detect_repo_name(repo_root)
|
|
85
|
+
frameworks = detect_repo_frameworks(repo_root)
|
|
86
|
+
files = walk_repo(repo_root, extra_excludes)
|
|
87
|
+
|
|
88
|
+
# Load tsconfig/jsconfig for path alias resolution
|
|
89
|
+
alias_resolver = TSConfigRegistry(repo_root)
|
|
90
|
+
|
|
91
|
+
# Load .env files for environment variable resolution
|
|
92
|
+
env_vars = load_env_files(repo_root)
|
|
93
|
+
|
|
94
|
+
if extractors is None:
|
|
95
|
+
extractors = _default_extractors()
|
|
96
|
+
|
|
97
|
+
doc = RepoDocumentation(
|
|
98
|
+
repo_name=repo_name,
|
|
99
|
+
repo_path=repo_root,
|
|
100
|
+
frameworks=frameworks,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
languages_seen: set[str] = set()
|
|
104
|
+
js_family = {"javascript", "typescript", "tsx"}
|
|
105
|
+
|
|
106
|
+
# Cache parse results to avoid double-parsing JS/TS files
|
|
107
|
+
parse_cache: dict[str, tuple] = {} # file_path -> (tree, language, source)
|
|
108
|
+
|
|
109
|
+
# === Pass 1: build wrapper index, type index, and prefix index ===
|
|
110
|
+
wrapper_idx = WrapperIndex(alias_resolver=alias_resolver, env_vars=env_vars)
|
|
111
|
+
type_idx = TypeIndex(alias_resolver=alias_resolver)
|
|
112
|
+
prefix_idx = PrefixIndex()
|
|
113
|
+
middleware_idx = MiddlewareIndex()
|
|
114
|
+
handler_idx = HandlerIndex()
|
|
115
|
+
for file_path in files:
|
|
116
|
+
parse_result = parse_file(file_path)
|
|
117
|
+
if parse_result:
|
|
118
|
+
tree, lang = parse_result
|
|
119
|
+
source = get_source(file_path)
|
|
120
|
+
parse_cache[file_path] = (tree, lang, source)
|
|
121
|
+
# Index wrappers (JS/TS only)
|
|
122
|
+
if lang in js_family:
|
|
123
|
+
wrapper_idx.index_file(file_path, tree, source)
|
|
124
|
+
# Index type definitions (all parseable languages)
|
|
125
|
+
type_idx.index_file(file_path, tree, source, lang)
|
|
126
|
+
# Index router/blueprint prefixes
|
|
127
|
+
prefix_idx.index_file(file_path, tree, source, lang)
|
|
128
|
+
# Index middleware registrations
|
|
129
|
+
middleware_idx.index_file(file_path, tree, source, lang)
|
|
130
|
+
# Index class handler method signatures
|
|
131
|
+
handler_idx.index_file(file_path, tree, source, lang)
|
|
132
|
+
|
|
133
|
+
# Resolve chained wrappers (functions that call other wrapper functions)
|
|
134
|
+
wrapper_idx.resolve_chains()
|
|
135
|
+
|
|
136
|
+
# Make indexes and env vars available to extractors
|
|
137
|
+
for extractor in extractors:
|
|
138
|
+
if hasattr(extractor, 'set_type_index'):
|
|
139
|
+
extractor.set_type_index(type_idx)
|
|
140
|
+
if hasattr(extractor, 'set_prefix_index'):
|
|
141
|
+
extractor.set_prefix_index(prefix_idx)
|
|
142
|
+
if hasattr(extractor, 'set_middleware_index'):
|
|
143
|
+
extractor.set_middleware_index(middleware_idx)
|
|
144
|
+
if hasattr(extractor, 'set_handler_index'):
|
|
145
|
+
extractor.set_handler_index(handler_idx)
|
|
146
|
+
if hasattr(extractor, 'set_env_vars'):
|
|
147
|
+
extractor.set_env_vars(env_vars)
|
|
148
|
+
|
|
149
|
+
# === Pass 2: run extractors + resolve wrapper calls ===
|
|
150
|
+
for file_path in files:
|
|
151
|
+
# Classify the file
|
|
152
|
+
classification = classify_file(file_path, repo_root)
|
|
153
|
+
doc.file_classifications.append(classification)
|
|
154
|
+
if classification.language != "unknown":
|
|
155
|
+
languages_seen.add(classification.language)
|
|
156
|
+
|
|
157
|
+
# Use cached parse result if available, otherwise parse now
|
|
158
|
+
if file_path in parse_cache:
|
|
159
|
+
tree, language, source = parse_cache[file_path]
|
|
160
|
+
else:
|
|
161
|
+
parse_result = parse_file(file_path)
|
|
162
|
+
tree = parse_result[0] if parse_result else None
|
|
163
|
+
language = parse_result[1] if parse_result else None
|
|
164
|
+
|
|
165
|
+
# Run each extractor
|
|
166
|
+
for extractor in extractors:
|
|
167
|
+
if not extractor.can_handle(file_path, language):
|
|
168
|
+
continue
|
|
169
|
+
|
|
170
|
+
artifacts = extractor.extract(file_path, repo_name, tree=tree, language=language)
|
|
171
|
+
for artifact in artifacts:
|
|
172
|
+
if isinstance(artifact, APIEndpoint):
|
|
173
|
+
doc.endpoints.append(artifact)
|
|
174
|
+
elif isinstance(artifact, APICall):
|
|
175
|
+
doc.api_calls.append(artifact)
|
|
176
|
+
elif isinstance(artifact, Dependency):
|
|
177
|
+
doc.dependencies.append(artifact)
|
|
178
|
+
elif isinstance(artifact, FileClassification):
|
|
179
|
+
pass # already added above
|
|
180
|
+
elif isinstance(artifact, DBOperation):
|
|
181
|
+
doc.db_operations.append(artifact)
|
|
182
|
+
|
|
183
|
+
# Resolve wrapper function calls in JS/TS files
|
|
184
|
+
if language in js_family and tree is not None:
|
|
185
|
+
if file_path not in parse_cache:
|
|
186
|
+
source = get_source(file_path)
|
|
187
|
+
else:
|
|
188
|
+
source = parse_cache[file_path][2]
|
|
189
|
+
|
|
190
|
+
caller_constants = build_constants_from_file(tree.root_node, source, env_vars)
|
|
191
|
+
imports = find_js_imports(tree.root_node, source)
|
|
192
|
+
for imp in imports:
|
|
193
|
+
for name in imp.names:
|
|
194
|
+
wrappers = wrapper_idx.resolve(name, imp.module_path, file_path)
|
|
195
|
+
if not wrappers:
|
|
196
|
+
continue
|
|
197
|
+
for wrapper in wrappers:
|
|
198
|
+
# Try to find actual call sites with resolved arguments
|
|
199
|
+
call_sites = _find_wrapper_call_urls(
|
|
200
|
+
tree.root_node, source, name, caller_constants,
|
|
201
|
+
)
|
|
202
|
+
if call_sites:
|
|
203
|
+
# Emit one API call per call site with the resolved URL
|
|
204
|
+
method = wrapper.api_calls[0].http_method if wrapper.api_calls else "GET"
|
|
205
|
+
for call_line, call_url in call_sites:
|
|
206
|
+
doc.api_calls.append(APICall(
|
|
207
|
+
repo=repo_name,
|
|
208
|
+
file_path=file_path,
|
|
209
|
+
line=call_line,
|
|
210
|
+
component_or_page=Path(file_path).stem,
|
|
211
|
+
http_method=method,
|
|
212
|
+
url_pattern=call_url,
|
|
213
|
+
client_library="fetch",
|
|
214
|
+
traced_from=f"{name}() in {Path(wrapper.file_path).name}:{wrapper.line}",
|
|
215
|
+
))
|
|
216
|
+
else:
|
|
217
|
+
# Fallback: use wrapper definition URLs (existing behavior)
|
|
218
|
+
for api_call in wrapper.api_calls:
|
|
219
|
+
doc.api_calls.append(APICall(
|
|
220
|
+
repo=repo_name,
|
|
221
|
+
file_path=file_path,
|
|
222
|
+
line=imp.line,
|
|
223
|
+
component_or_page=Path(file_path).stem,
|
|
224
|
+
http_method=api_call.http_method,
|
|
225
|
+
url_pattern=api_call.url_pattern,
|
|
226
|
+
client_library="fetch",
|
|
227
|
+
traced_from=f"{name}() in {Path(wrapper.file_path).name}:{wrapper.line}",
|
|
228
|
+
))
|
|
229
|
+
|
|
230
|
+
doc.languages = sorted(languages_seen)
|
|
231
|
+
|
|
232
|
+
# Post-processing: link DB tables to endpoints
|
|
233
|
+
_link_db_to_endpoints(doc)
|
|
234
|
+
|
|
235
|
+
return ScanResult(
|
|
236
|
+
doc=doc,
|
|
237
|
+
type_index=type_idx,
|
|
238
|
+
middleware_index=middleware_idx,
|
|
239
|
+
file_list=files,
|
|
240
|
+
alias_resolver=alias_resolver,
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def scan_repo(
|
|
245
|
+
repo_root: str,
|
|
246
|
+
extra_excludes: list[str] | None = None,
|
|
247
|
+
extractors: list[BaseExtractor] | None = None,
|
|
248
|
+
) -> RepoDocumentation:
|
|
249
|
+
"""Scan a single repository and return its documentation.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
repo_root: Path to the repository root.
|
|
253
|
+
extra_excludes: Additional path patterns to exclude.
|
|
254
|
+
extractors: Override the default set of extractors.
|
|
255
|
+
"""
|
|
256
|
+
return _scan_repo_full(repo_root, extra_excludes, extractors).doc
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def _find_wrapper_call_urls(root_node, source, func_name, constants):
|
|
261
|
+
"""Find call expressions for a wrapper function and extract the first URL argument.
|
|
262
|
+
|
|
263
|
+
Returns a list of (line_number, resolved_url) for each call site where
|
|
264
|
+
the first argument is a string or template literal that resolves to a URL.
|
|
265
|
+
"""
|
|
266
|
+
results = []
|
|
267
|
+
for call in find_nodes_by_type(root_node, 'call_expression'):
|
|
268
|
+
func = call.child_by_field_name('function')
|
|
269
|
+
if not func or node_text(func, source) != func_name:
|
|
270
|
+
continue
|
|
271
|
+
|
|
272
|
+
args = call.child_by_field_name('arguments')
|
|
273
|
+
if not args:
|
|
274
|
+
continue
|
|
275
|
+
|
|
276
|
+
# Extract first string-like argument as the URL path
|
|
277
|
+
for arg in args.children:
|
|
278
|
+
if arg.type in ('string', 'template_string', 'binary_expression'):
|
|
279
|
+
url = resolve_url_from_node(arg, source, constants)
|
|
280
|
+
if url:
|
|
281
|
+
results.append((call.start_point[0] + 1, url))
|
|
282
|
+
break
|
|
283
|
+
return results
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
_UNRESOLVED_URL_RE = re.compile(r':\d+:\w+|(?<![/]):[a-z]{2,}$')
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def _is_unresolved_url(url: str) -> bool:
|
|
290
|
+
"""Check if a URL contains unresolved function parameter placeholders.
|
|
291
|
+
|
|
292
|
+
Detects patterns like 'http://localhost:5000:path' where :path is a
|
|
293
|
+
function parameter that wasn't resolved, as opposed to route params
|
|
294
|
+
like ':id' or ':userId' which appear after a slash.
|
|
295
|
+
"""
|
|
296
|
+
return bool(_UNRESOLVED_URL_RE.search(url))
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def _link_db_to_endpoints(doc: RepoDocumentation) -> None:
|
|
300
|
+
"""Associate DB table names with the endpoints whose handler contains them.
|
|
301
|
+
|
|
302
|
+
Uses line ranges: a DB operation belongs to an endpoint if it falls between
|
|
303
|
+
that endpoint's line and the next endpoint's start line in the same file.
|
|
304
|
+
Falls back to same-file grouping for files with only one endpoint.
|
|
305
|
+
"""
|
|
306
|
+
ops_by_file: dict[str, list[DBOperation]] = {}
|
|
307
|
+
for op in doc.db_operations:
|
|
308
|
+
ops_by_file.setdefault(op.file_path, []).append(op)
|
|
309
|
+
|
|
310
|
+
# Group endpoints by file and sort by line number
|
|
311
|
+
endpoints_by_file: dict[str, list[APIEndpoint]] = {}
|
|
312
|
+
for ep in doc.endpoints:
|
|
313
|
+
endpoints_by_file.setdefault(ep.file_path, []).append(ep)
|
|
314
|
+
|
|
315
|
+
for file_path, file_endpoints in endpoints_by_file.items():
|
|
316
|
+
file_ops = ops_by_file.get(file_path, [])
|
|
317
|
+
if not file_ops:
|
|
318
|
+
continue
|
|
319
|
+
|
|
320
|
+
sorted_eps = sorted(file_endpoints, key=lambda e: e.line)
|
|
321
|
+
|
|
322
|
+
for i, endpoint in enumerate(sorted_eps):
|
|
323
|
+
start_line = endpoint.line
|
|
324
|
+
# End line is the start of the next endpoint (or end of file)
|
|
325
|
+
if i + 1 < len(sorted_eps):
|
|
326
|
+
end_line = sorted_eps[i + 1].line
|
|
327
|
+
else:
|
|
328
|
+
end_line = float("inf")
|
|
329
|
+
|
|
330
|
+
# Find DB operations within this endpoint's line range
|
|
331
|
+
tables = set()
|
|
332
|
+
for op in file_ops:
|
|
333
|
+
if start_line <= op.line < end_line:
|
|
334
|
+
tables.add(op.table_name)
|
|
335
|
+
|
|
336
|
+
if tables:
|
|
337
|
+
endpoint.db_tables = sorted(tables)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def scan_repos(
|
|
341
|
+
repo_roots: list[str],
|
|
342
|
+
extra_excludes: list[str] | None = None,
|
|
343
|
+
) -> list[RepoDocumentation]:
|
|
344
|
+
"""Scan multiple repositories and correlate cross-repo relationships."""
|
|
345
|
+
docs = []
|
|
346
|
+
for root in repo_roots:
|
|
347
|
+
docs.append(scan_repo(root, extra_excludes))
|
|
348
|
+
|
|
349
|
+
# Cross-repo correlation
|
|
350
|
+
if len(docs) > 1:
|
|
351
|
+
from commiter.correlator import correlate
|
|
352
|
+
relationships = correlate(docs)
|
|
353
|
+
# Distribute relationships to source repos
|
|
354
|
+
for rel in relationships:
|
|
355
|
+
for doc in docs:
|
|
356
|
+
if doc.repo_name == rel.source_repo:
|
|
357
|
+
doc.service_relationships.append(rel)
|
|
358
|
+
break
|
|
359
|
+
|
|
360
|
+
return docs
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def scan_repos_full(
|
|
364
|
+
repo_roots: list[str],
|
|
365
|
+
extra_excludes: list[str] | None = None,
|
|
366
|
+
) -> list[ScanResult]:
|
|
367
|
+
"""Scan multiple repositories with full index access for architecture output."""
|
|
368
|
+
results = []
|
|
369
|
+
for root in repo_roots:
|
|
370
|
+
results.append(_scan_repo_full(root, extra_excludes))
|
|
371
|
+
|
|
372
|
+
# Cross-repo correlation
|
|
373
|
+
docs = [r.doc for r in results]
|
|
374
|
+
if len(docs) > 1:
|
|
375
|
+
from commiter.correlator import correlate
|
|
376
|
+
relationships = correlate(docs)
|
|
377
|
+
for rel in relationships:
|
|
378
|
+
for doc in docs:
|
|
379
|
+
if doc.repo_name == rel.source_repo:
|
|
380
|
+
doc.service_relationships.append(rel)
|
|
381
|
+
break
|
|
382
|
+
|
|
383
|
+
return results
|
commiter/type_index.py
ADDED
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
"""Cross-file type index: resolves type/interface definitions across files.
|
|
2
|
+
|
|
3
|
+
Mirrors the WrapperIndex pattern — indexes all type definitions in Pass 1,
|
|
4
|
+
then resolves type references from other files in Pass 2.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import TYPE_CHECKING
|
|
13
|
+
|
|
14
|
+
from commiter.parser import (
|
|
15
|
+
TypeField,
|
|
16
|
+
find_ts_interface_fields,
|
|
17
|
+
find_ts_enum_declarations,
|
|
18
|
+
find_ts_type_aliases,
|
|
19
|
+
find_ts_const_objects,
|
|
20
|
+
find_js_re_exports,
|
|
21
|
+
find_py_class_fields,
|
|
22
|
+
extract_type_parameters,
|
|
23
|
+
find_nodes_by_type,
|
|
24
|
+
node_text,
|
|
25
|
+
JSReExport,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
from tree_sitter import Tree
|
|
30
|
+
from commiter.utils.tsconfig_resolver import TSConfigResolver
|
|
31
|
+
|
|
32
|
+
# Extensions to try when resolving import paths
|
|
33
|
+
RESOLVE_EXTENSIONS = [".ts", ".tsx", ".js", ".jsx", "/index.ts", "/index.js", "/index.tsx"]
|
|
34
|
+
|
|
35
|
+
# Python class base classes that indicate a type definition worth indexing
|
|
36
|
+
PY_TYPE_BASES = {"BaseModel", "Schema", "TypedDict"}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class TypeDefinition:
|
|
41
|
+
"""A type/interface/class/enum definition found in a source file."""
|
|
42
|
+
name: str
|
|
43
|
+
file_path: str
|
|
44
|
+
line: int
|
|
45
|
+
fields: list[TypeField]
|
|
46
|
+
language: str
|
|
47
|
+
kind: str = "interface" # "interface" | "enum" | "type_alias" | "const"
|
|
48
|
+
generic_params: list[str] = field(default_factory=list) # ["T"], ["T", "U"]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class TypeIndex:
|
|
52
|
+
"""Index of type definitions across all files in a repository.
|
|
53
|
+
|
|
54
|
+
Usage:
|
|
55
|
+
idx = TypeIndex(alias_resolver=resolver)
|
|
56
|
+
# Pass 1: index all files
|
|
57
|
+
for path in files:
|
|
58
|
+
idx.index_file(path, tree, source, language)
|
|
59
|
+
# Pass 2: resolve types from other files
|
|
60
|
+
typedef = idx.resolve("CreateUserBody", "../types/api", "/project/routes/users.ts")
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
def __init__(self, alias_resolver: "TSConfigResolver | None" = None) -> None:
|
|
64
|
+
# type_name -> list of definitions (same name can exist in multiple files)
|
|
65
|
+
self._types: dict[str, list[TypeDefinition]] = {}
|
|
66
|
+
# normalized absolute path (no extension) -> actual absolute path
|
|
67
|
+
self._indexed_paths: dict[str, str] = {}
|
|
68
|
+
self._alias_resolver = alias_resolver
|
|
69
|
+
# barrel file re-export cache: abs_path -> list[JSReExport]
|
|
70
|
+
self._re_exports: dict[str, list[JSReExport]] = {}
|
|
71
|
+
|
|
72
|
+
def index_file(self, file_path: str, tree: "Tree", source: bytes, language: str) -> None:
|
|
73
|
+
"""Pass 1: extract all type definitions from a file."""
|
|
74
|
+
abs_path = os.path.abspath(file_path)
|
|
75
|
+
|
|
76
|
+
# Store path for import resolution
|
|
77
|
+
stem = _strip_extension(abs_path)
|
|
78
|
+
self._indexed_paths[stem] = abs_path
|
|
79
|
+
if Path(abs_path).stem == "index":
|
|
80
|
+
self._indexed_paths[str(Path(abs_path).parent)] = abs_path
|
|
81
|
+
|
|
82
|
+
if language in ("typescript", "tsx", "javascript"):
|
|
83
|
+
# Cache re-exports for barrel file resolution
|
|
84
|
+
re_exports = find_js_re_exports(tree.root_node, source)
|
|
85
|
+
if re_exports:
|
|
86
|
+
self._re_exports[abs_path] = re_exports
|
|
87
|
+
self._index_ts_file(abs_path, tree, source, language)
|
|
88
|
+
elif language == "python":
|
|
89
|
+
self._index_py_file(abs_path, tree, source)
|
|
90
|
+
|
|
91
|
+
def _index_ts_file(self, abs_path: str, tree: "Tree", source: bytes, language: str) -> None:
|
|
92
|
+
"""Index TypeScript/JavaScript interfaces, enums, type aliases, and const objects."""
|
|
93
|
+
root = tree.root_node
|
|
94
|
+
|
|
95
|
+
# Build a map of type name -> generic params from declarations
|
|
96
|
+
generic_params_map = self._extract_all_generic_params(root, source)
|
|
97
|
+
|
|
98
|
+
# Interfaces
|
|
99
|
+
for name, fields in find_ts_interface_fields(root, source).items():
|
|
100
|
+
line = self._find_type_line(root, source, name)
|
|
101
|
+
self._types.setdefault(name, []).append(TypeDefinition(
|
|
102
|
+
name=name, file_path=abs_path, line=line,
|
|
103
|
+
fields=fields, language=language, kind="interface",
|
|
104
|
+
generic_params=generic_params_map.get(name, []),
|
|
105
|
+
))
|
|
106
|
+
|
|
107
|
+
# Enums
|
|
108
|
+
for name, members in find_ts_enum_declarations(root, source).items():
|
|
109
|
+
line = self._find_type_line(root, source, name)
|
|
110
|
+
self._types.setdefault(name, []).append(TypeDefinition(
|
|
111
|
+
name=name, file_path=abs_path, line=line,
|
|
112
|
+
fields=members, language=language, kind="enum",
|
|
113
|
+
))
|
|
114
|
+
|
|
115
|
+
# Type aliases
|
|
116
|
+
for name, fields in find_ts_type_aliases(root, source).items():
|
|
117
|
+
line = self._find_type_line(root, source, name)
|
|
118
|
+
self._types.setdefault(name, []).append(TypeDefinition(
|
|
119
|
+
name=name, file_path=abs_path, line=line,
|
|
120
|
+
fields=fields, language=language, kind="type_alias",
|
|
121
|
+
generic_params=generic_params_map.get(name, []),
|
|
122
|
+
))
|
|
123
|
+
|
|
124
|
+
# Const objects (as const)
|
|
125
|
+
for name, fields in find_ts_const_objects(root, source).items():
|
|
126
|
+
line = self._find_type_line(root, source, name)
|
|
127
|
+
self._types.setdefault(name, []).append(TypeDefinition(
|
|
128
|
+
name=name, file_path=abs_path, line=line,
|
|
129
|
+
fields=fields, language=language, kind="const",
|
|
130
|
+
))
|
|
131
|
+
|
|
132
|
+
def _extract_all_generic_params(self, root, source: bytes) -> dict[str, list[str]]:
|
|
133
|
+
"""Extract generic parameters for all interfaces and type aliases in a file."""
|
|
134
|
+
result: dict[str, list[str]] = {}
|
|
135
|
+
for child in root.children:
|
|
136
|
+
target = child
|
|
137
|
+
if child.type == "export_statement":
|
|
138
|
+
for sub in child.children:
|
|
139
|
+
if sub.type in ("interface_declaration", "type_alias_declaration"):
|
|
140
|
+
target = sub
|
|
141
|
+
break
|
|
142
|
+
if target.type in ("interface_declaration", "type_alias_declaration"):
|
|
143
|
+
name_node = target.child_by_field_name("name")
|
|
144
|
+
if name_node:
|
|
145
|
+
name = node_text(name_node, source)
|
|
146
|
+
params = extract_type_parameters(target, source)
|
|
147
|
+
if params:
|
|
148
|
+
result[name] = params
|
|
149
|
+
return result
|
|
150
|
+
|
|
151
|
+
def _index_py_file(self, abs_path: str, tree: "Tree", source: bytes) -> None:
|
|
152
|
+
"""Index Python class definitions (Pydantic, TypedDict, dataclasses)."""
|
|
153
|
+
import re
|
|
154
|
+
text = source.decode("utf-8", errors="replace")
|
|
155
|
+
for match in re.finditer(r'class\s+(\w+)\s*\([^)]*(?:' + '|'.join(PY_TYPE_BASES) + r')', text):
|
|
156
|
+
class_name = match.group(1)
|
|
157
|
+
fields = find_py_class_fields(tree.root_node, source, class_name)
|
|
158
|
+
if fields:
|
|
159
|
+
line = text[:match.start()].count("\n") + 1
|
|
160
|
+
self._types.setdefault(class_name, []).append(TypeDefinition(
|
|
161
|
+
name=class_name,
|
|
162
|
+
file_path=abs_path,
|
|
163
|
+
line=line,
|
|
164
|
+
fields=fields,
|
|
165
|
+
language="python",
|
|
166
|
+
))
|
|
167
|
+
|
|
168
|
+
def resolve(self, type_name: str, import_path: str | None, caller_file: str) -> TypeDefinition | None:
|
|
169
|
+
"""Pass 2: resolve a type reference to its definition.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
type_name: The type/interface name (e.g. "CreateUserBody").
|
|
173
|
+
import_path: The import module path (e.g. "../types/api", "@/types"), or None.
|
|
174
|
+
caller_file: Absolute path of the file that references the type.
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
The TypeDefinition if found, None otherwise.
|
|
178
|
+
"""
|
|
179
|
+
if type_name not in self._types:
|
|
180
|
+
return None
|
|
181
|
+
|
|
182
|
+
# If no import path, return first match (best effort)
|
|
183
|
+
if import_path is None:
|
|
184
|
+
return self._types[type_name][0] if self._types[type_name] else None
|
|
185
|
+
|
|
186
|
+
resolved_path = self._resolve_import_path(import_path, caller_file)
|
|
187
|
+
if resolved_path is None:
|
|
188
|
+
# Fallback: return first match
|
|
189
|
+
return self._types[type_name][0] if self._types[type_name] else None
|
|
190
|
+
|
|
191
|
+
# Filter to definitions in the resolved file
|
|
192
|
+
for typedef in self._types[type_name]:
|
|
193
|
+
if typedef.file_path == resolved_path:
|
|
194
|
+
return typedef
|
|
195
|
+
|
|
196
|
+
# Barrel file fallback: check if resolved_path re-exports this type
|
|
197
|
+
actual_path = self._resolve_through_barrel(type_name, resolved_path)
|
|
198
|
+
if actual_path:
|
|
199
|
+
for typedef in self._types[type_name]:
|
|
200
|
+
if typedef.file_path == actual_path:
|
|
201
|
+
return typedef
|
|
202
|
+
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
def resolve_local(self, type_name: str, file_path: str) -> TypeDefinition | None:
|
|
206
|
+
"""Look up a type defined in the same file."""
|
|
207
|
+
if type_name not in self._types:
|
|
208
|
+
return None
|
|
209
|
+
abs_path = os.path.abspath(file_path)
|
|
210
|
+
for typedef in self._types[type_name]:
|
|
211
|
+
if typedef.file_path == abs_path:
|
|
212
|
+
return typedef
|
|
213
|
+
return None
|
|
214
|
+
|
|
215
|
+
def _resolve_through_barrel(self, name: str, barrel_path: str) -> str | None:
|
|
216
|
+
"""Follow re-exports in a barrel file to find the actual source file."""
|
|
217
|
+
re_exports = self._re_exports.get(barrel_path)
|
|
218
|
+
if not re_exports:
|
|
219
|
+
return None
|
|
220
|
+
|
|
221
|
+
barrel_dir = str(Path(barrel_path).parent)
|
|
222
|
+
|
|
223
|
+
for re_export in re_exports:
|
|
224
|
+
if name not in re_export.names and "*" not in re_export.names:
|
|
225
|
+
continue
|
|
226
|
+
|
|
227
|
+
target_base = os.path.normpath(os.path.join(barrel_dir, re_export.module_path))
|
|
228
|
+
target_stem = _strip_extension(target_base)
|
|
229
|
+
|
|
230
|
+
if target_stem in self._indexed_paths:
|
|
231
|
+
return self._indexed_paths[target_stem]
|
|
232
|
+
|
|
233
|
+
for ext in RESOLVE_EXTENSIONS:
|
|
234
|
+
candidate_stem = _strip_extension(target_base + ext)
|
|
235
|
+
if candidate_stem in self._indexed_paths:
|
|
236
|
+
return self._indexed_paths[candidate_stem]
|
|
237
|
+
|
|
238
|
+
return None
|
|
239
|
+
|
|
240
|
+
def _resolve_import_path(self, import_path: str, caller_file: str) -> str | None:
|
|
241
|
+
"""Resolve an import path to an absolute file path."""
|
|
242
|
+
if import_path.startswith("."):
|
|
243
|
+
# Relative import
|
|
244
|
+
caller_dir = str(Path(os.path.abspath(caller_file)).parent)
|
|
245
|
+
base = os.path.normpath(os.path.join(caller_dir, import_path))
|
|
246
|
+
elif self._alias_resolver:
|
|
247
|
+
# Try tsconfig alias
|
|
248
|
+
resolved = self._alias_resolver.resolve(import_path, caller_file)
|
|
249
|
+
if resolved:
|
|
250
|
+
return resolved
|
|
251
|
+
return None
|
|
252
|
+
else:
|
|
253
|
+
# Python relative imports: .schemas -> schemas.py in same dir
|
|
254
|
+
if import_path.startswith("."):
|
|
255
|
+
caller_dir = str(Path(os.path.abspath(caller_file)).parent)
|
|
256
|
+
# Count leading dots for relative depth
|
|
257
|
+
dots = len(import_path) - len(import_path.lstrip("."))
|
|
258
|
+
module = import_path[dots:].replace(".", "/")
|
|
259
|
+
parent = Path(caller_dir)
|
|
260
|
+
for _ in range(dots - 1):
|
|
261
|
+
parent = parent.parent
|
|
262
|
+
base = str(parent / module)
|
|
263
|
+
else:
|
|
264
|
+
return None
|
|
265
|
+
|
|
266
|
+
# Try matching indexed paths
|
|
267
|
+
stem = _strip_extension(base)
|
|
268
|
+
if stem in self._indexed_paths:
|
|
269
|
+
return self._indexed_paths[stem]
|
|
270
|
+
|
|
271
|
+
# Try with extensions
|
|
272
|
+
for ext in RESOLVE_EXTENSIONS:
|
|
273
|
+
candidate = base + ext
|
|
274
|
+
candidate_stem = _strip_extension(candidate)
|
|
275
|
+
if candidate_stem in self._indexed_paths:
|
|
276
|
+
return self._indexed_paths[candidate_stem]
|
|
277
|
+
|
|
278
|
+
# Python: try .py extension
|
|
279
|
+
py_candidate = base + ".py"
|
|
280
|
+
if os.path.isfile(py_candidate):
|
|
281
|
+
return os.path.abspath(py_candidate)
|
|
282
|
+
|
|
283
|
+
return None
|
|
284
|
+
|
|
285
|
+
def _find_type_line(self, root_node, source: bytes, type_name: str) -> int:
|
|
286
|
+
"""Find the line number of a type definition."""
|
|
287
|
+
from commiter.parser import find_nodes_by_type
|
|
288
|
+
for node in find_nodes_by_type(root_node, "interface_declaration"):
|
|
289
|
+
name_node = node.child_by_field_name("name")
|
|
290
|
+
if name_node and node_text(name_node, source) == type_name:
|
|
291
|
+
return node.start_point[0] + 1
|
|
292
|
+
return 0
|
|
293
|
+
|
|
294
|
+
@property
|
|
295
|
+
def type_count(self) -> int:
|
|
296
|
+
return sum(len(defs) for defs in self._types.values())
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def _strip_extension(path: str) -> str:
|
|
300
|
+
"""Strip JS/TS/Python file extensions from a path."""
|
|
301
|
+
p = Path(path)
|
|
302
|
+
while p.suffix in (".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs", ".py"):
|
|
303
|
+
p = p.with_suffix("")
|
|
304
|
+
return str(p)
|