compos-cli 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compos/cli/__init__.py +0 -0
- compos/cli/analyzers/__init__.py +10 -0
- compos/cli/analyzers/base.py +66 -0
- compos/cli/analyzers/docker_compose.py +137 -0
- compos/cli/analyzers/python.py +604 -0
- compos/cli/analyzers/typescript.py +823 -0
- compos/cli/git.py +92 -0
- compos/cli/main.py +464 -0
- compos/cli/watcher.py +1 -0
- compos/core/__init__.py +119 -0
- compos/core/diff.py +131 -0
- compos/core/graph.py +648 -0
- compos/core/integrity.py +346 -0
- compos/core/merge.py +289 -0
- compos/core/merge_log.py +128 -0
- compos/core/versioning.py +43 -0
- compos/core/write_pipeline.py +574 -0
- compos/schema/__init__.py +57 -0
- compos/schema/models.py +440 -0
- compos/schema/validation.py +1 -0
- compos/schema/versioning.py +1 -0
- compos/storage/__init__.py +29 -0
- compos/storage/local.py +209 -0
- compos/storage/locking.py +74 -0
- compos/storage/merge_log.py +92 -0
- compos_cli-0.0.0.dist-info/METADATA +16 -0
- compos_cli-0.0.0.dist-info/RECORD +29 -0
- compos_cli-0.0.0.dist-info/WHEEL +4 -0
- compos_cli-0.0.0.dist-info/entry_points.txt +7 -0
|
@@ -0,0 +1,604 @@
|
|
|
1
|
+
"""Python-specific detection."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import ast
|
|
6
|
+
import logging
|
|
7
|
+
from collections.abc import Iterator
|
|
8
|
+
from datetime import UTC, datetime
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from compos.cli.analyzers.base import AnalysisResult
|
|
12
|
+
from compos.schema.models import (
|
|
13
|
+
Component,
|
|
14
|
+
ComponentType,
|
|
15
|
+
ObjectStatus,
|
|
16
|
+
Provenance,
|
|
17
|
+
ProvenanceSource,
|
|
18
|
+
Relationship,
|
|
19
|
+
RelationshipPattern,
|
|
20
|
+
RelationshipType,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
_SKIP_DIRS = frozenset(
|
|
26
|
+
{
|
|
27
|
+
".venv",
|
|
28
|
+
"venv",
|
|
29
|
+
"node_modules",
|
|
30
|
+
"__pycache__",
|
|
31
|
+
".git",
|
|
32
|
+
".compos",
|
|
33
|
+
"site-packages",
|
|
34
|
+
".tox",
|
|
35
|
+
".mypy_cache",
|
|
36
|
+
".worktrees",
|
|
37
|
+
"worktrees",
|
|
38
|
+
"tests",
|
|
39
|
+
"test",
|
|
40
|
+
}
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
_FRAMEWORK_CALLS: dict[str, str] = {
|
|
44
|
+
"FastAPI": "FastAPI application",
|
|
45
|
+
"Flask": "Flask application",
|
|
46
|
+
"Django": "Django application",
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
# call-name → responsibility label
|
|
50
|
+
_DB_CALLS: dict[str, str] = {
|
|
51
|
+
"create_engine": "SQLAlchemy database engine",
|
|
52
|
+
"create_async_engine": "SQLAlchemy async database engine",
|
|
53
|
+
"declarative_base": "SQLAlchemy declarative base",
|
|
54
|
+
"MongoClient": "MongoDB client",
|
|
55
|
+
"AsyncIOMotorClient": "MongoDB async client (Motor)",
|
|
56
|
+
"Redis": "Redis client",
|
|
57
|
+
"StrictRedis": "Redis client",
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
# Module-gated DB calls: only match when imported from specific modules
|
|
61
|
+
_DB_MODULE_CALLS: dict[str, set[str]] = {
|
|
62
|
+
"connect": {"psycopg2", "asyncpg"},
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
# call-name → (ComponentType, responsibility)
|
|
66
|
+
_QUEUE_CALLS: dict[str, tuple[ComponentType, str]] = {
|
|
67
|
+
"Celery": (ComponentType.WORKER, "Celery task worker"),
|
|
68
|
+
"KafkaProducer": (ComponentType.QUEUE, "Kafka producer"),
|
|
69
|
+
"KafkaConsumer": (ComponentType.QUEUE, "Kafka consumer"),
|
|
70
|
+
"BlockingConnection": (ComponentType.QUEUE, "RabbitMQ connection (pika)"),
|
|
71
|
+
"SelectConnection": (ComponentType.QUEUE, "RabbitMQ connection (pika)"),
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
_IGNORE_PACKAGE_DIRS = _SKIP_DIRS | frozenset(
|
|
76
|
+
{"tests", "test", "docs", "scripts", "bin", "migrations", "alembic"}
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _is_package(path: Path) -> bool:
|
|
81
|
+
"""Return True if *path* is a directory containing ``__init__.py``."""
|
|
82
|
+
return path.is_dir() and (path / "__init__.py").is_file()
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _find_source_root(project_root: Path) -> Path | None:
|
|
86
|
+
"""Find the main Python package root under *project_root*.
|
|
87
|
+
|
|
88
|
+
For src-layout projects (``src/<pkg>/``), returns the package dir under
|
|
89
|
+
``src/``. For flat-layout (``<pkg>/``), returns the first package dir at
|
|
90
|
+
root. Returns ``None`` if no packages are found.
|
|
91
|
+
"""
|
|
92
|
+
# src-layout: look inside src/ first
|
|
93
|
+
src_dir = project_root / "src"
|
|
94
|
+
if src_dir.is_dir():
|
|
95
|
+
for child in sorted(src_dir.iterdir()):
|
|
96
|
+
if child.name.startswith("."):
|
|
97
|
+
continue
|
|
98
|
+
if child.name in _IGNORE_PACKAGE_DIRS:
|
|
99
|
+
continue
|
|
100
|
+
if _is_package(child):
|
|
101
|
+
return child
|
|
102
|
+
|
|
103
|
+
# flat-layout: first qualifying package at project root
|
|
104
|
+
for child in sorted(project_root.iterdir()):
|
|
105
|
+
if child.name.startswith("."):
|
|
106
|
+
continue
|
|
107
|
+
if child.name in _IGNORE_PACKAGE_DIRS:
|
|
108
|
+
continue
|
|
109
|
+
if _is_package(child):
|
|
110
|
+
return child
|
|
111
|
+
|
|
112
|
+
return None
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _extract_init_docstring(package_dir: Path) -> str | None:
|
|
116
|
+
"""Extract the first-line docstring from ``__init__.py``, or None."""
|
|
117
|
+
init_file = package_dir / "__init__.py"
|
|
118
|
+
if not init_file.is_file():
|
|
119
|
+
return None
|
|
120
|
+
try:
|
|
121
|
+
tree = ast.parse(init_file.read_text())
|
|
122
|
+
except SyntaxError:
|
|
123
|
+
return None
|
|
124
|
+
docstring = ast.get_docstring(tree)
|
|
125
|
+
if docstring:
|
|
126
|
+
return docstring.split("\n")[0]
|
|
127
|
+
return None
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _detect_packages(source_root: Path, project_root: Path) -> list[Component]:
|
|
131
|
+
"""Detect direct child packages of *source_root* as LIBRARY components."""
|
|
132
|
+
parent_name = source_root.name
|
|
133
|
+
components: list[Component] = []
|
|
134
|
+
for child in sorted(source_root.iterdir()):
|
|
135
|
+
if child.name.startswith("."):
|
|
136
|
+
continue
|
|
137
|
+
if child.name in _IGNORE_PACKAGE_DIRS:
|
|
138
|
+
continue
|
|
139
|
+
if not _is_package(child):
|
|
140
|
+
continue
|
|
141
|
+
child_name = child.name
|
|
142
|
+
docstring = _extract_init_docstring(child)
|
|
143
|
+
responsibility = docstring if docstring else "Python package"
|
|
144
|
+
components.append(
|
|
145
|
+
Component(
|
|
146
|
+
id=f"pypkg-{parent_name}.{child_name}",
|
|
147
|
+
name=child_name,
|
|
148
|
+
responsibility=responsibility,
|
|
149
|
+
type=ComponentType.LIBRARY,
|
|
150
|
+
detection_confidence=0.70,
|
|
151
|
+
status=ObjectStatus.CANDIDATE,
|
|
152
|
+
provenance=_make_provenance(),
|
|
153
|
+
paths=(str(child.relative_to(project_root)) + "/",),
|
|
154
|
+
)
|
|
155
|
+
)
|
|
156
|
+
return components
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _make_provenance() -> Provenance:
|
|
160
|
+
return Provenance(
|
|
161
|
+
source=ProvenanceSource.STATIC_ANALYSIS,
|
|
162
|
+
tool="cli-analyze",
|
|
163
|
+
timestamp=datetime.now(UTC),
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _file_to_module_id(file: Path, root: Path, prefix: str) -> str:
|
|
168
|
+
rel = file.relative_to(root).with_suffix("")
|
|
169
|
+
module = str(rel).replace("/", ".").replace("\\", ".")
|
|
170
|
+
return f"{prefix}-{module}"
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _display_name(file: Path, root: Path) -> str:
|
|
174
|
+
"""Human-readable name: parent/stem for disambiguation, stem if unique."""
|
|
175
|
+
rel = file.relative_to(root).with_suffix("")
|
|
176
|
+
parts = rel.parts
|
|
177
|
+
if len(parts) >= 2:
|
|
178
|
+
return f"{parts[-2]}/{parts[-1]}"
|
|
179
|
+
return parts[-1] if parts else file.stem
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _iter_python_files(root: Path) -> Iterator[Path]:
|
|
183
|
+
for p in sorted(root.rglob("*.py")):
|
|
184
|
+
if any(part in _SKIP_DIRS for part in p.parts):
|
|
185
|
+
continue
|
|
186
|
+
yield p
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _get_call_name(node: ast.Call) -> str | None:
|
|
190
|
+
"""Extract the function/method name from a Call node."""
|
|
191
|
+
if isinstance(node.func, ast.Name):
|
|
192
|
+
return node.func.id
|
|
193
|
+
if isinstance(node.func, ast.Attribute):
|
|
194
|
+
return node.func.attr
|
|
195
|
+
return None
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _collect_imports(tree: ast.Module) -> dict[str, str]:
|
|
199
|
+
"""Map imported names to their source modules.
|
|
200
|
+
|
|
201
|
+
e.g. ``from psycopg2 import connect`` → {"connect": "psycopg2"}
|
|
202
|
+
"""
|
|
203
|
+
mapping: dict[str, str] = {}
|
|
204
|
+
for node in ast.walk(tree):
|
|
205
|
+
if isinstance(node, ast.ImportFrom) and node.module:
|
|
206
|
+
top_module = node.module.split(".")[0]
|
|
207
|
+
for alias in node.names:
|
|
208
|
+
mapping[alias.asname or alias.name] = top_module
|
|
209
|
+
return mapping
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _detect_framework_apps(
|
|
213
|
+
file: Path,
|
|
214
|
+
root: Path,
|
|
215
|
+
tree: ast.Module,
|
|
216
|
+
) -> list[Component]:
|
|
217
|
+
components: list[Component] = []
|
|
218
|
+
for node in ast.walk(tree):
|
|
219
|
+
if not isinstance(node, ast.Call):
|
|
220
|
+
continue
|
|
221
|
+
func_name = _get_call_name(node)
|
|
222
|
+
if func_name and func_name in _FRAMEWORK_CALLS:
|
|
223
|
+
components.append(
|
|
224
|
+
Component(
|
|
225
|
+
id=_file_to_module_id(file, root, "pyapp"),
|
|
226
|
+
name=_display_name(file, root),
|
|
227
|
+
responsibility=_FRAMEWORK_CALLS[func_name],
|
|
228
|
+
type=ComponentType.SERVICE,
|
|
229
|
+
detection_confidence=0.85,
|
|
230
|
+
status=ObjectStatus.CANDIDATE,
|
|
231
|
+
provenance=_make_provenance(),
|
|
232
|
+
paths=(str(file.relative_to(root)),),
|
|
233
|
+
)
|
|
234
|
+
)
|
|
235
|
+
break # one component per file
|
|
236
|
+
return components
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _detect_database_clients(
|
|
240
|
+
file: Path,
|
|
241
|
+
root: Path,
|
|
242
|
+
tree: ast.Module,
|
|
243
|
+
) -> list[Component]:
|
|
244
|
+
"""Detect database client instantiation — one component per file."""
|
|
245
|
+
imports = _collect_imports(tree)
|
|
246
|
+
for node in ast.walk(tree):
|
|
247
|
+
if not isinstance(node, ast.Call):
|
|
248
|
+
continue
|
|
249
|
+
func_name = _get_call_name(node)
|
|
250
|
+
if func_name is None:
|
|
251
|
+
continue
|
|
252
|
+
# Direct match (e.g. create_engine, MongoClient, Redis)
|
|
253
|
+
if func_name in _DB_CALLS:
|
|
254
|
+
return [
|
|
255
|
+
Component(
|
|
256
|
+
id=_file_to_module_id(file, root, "pydb"),
|
|
257
|
+
name=_display_name(file, root),
|
|
258
|
+
responsibility=_DB_CALLS[func_name],
|
|
259
|
+
type=ComponentType.DATABASE,
|
|
260
|
+
detection_confidence=0.80,
|
|
261
|
+
status=ObjectStatus.CANDIDATE,
|
|
262
|
+
provenance=_make_provenance(),
|
|
263
|
+
paths=(str(file.relative_to(root)),),
|
|
264
|
+
)
|
|
265
|
+
]
|
|
266
|
+
# Module-gated match (e.g. psycopg2.connect)
|
|
267
|
+
if func_name in _DB_MODULE_CALLS:
|
|
268
|
+
source_module = imports.get(func_name)
|
|
269
|
+
if source_module and source_module in _DB_MODULE_CALLS[func_name]:
|
|
270
|
+
return [
|
|
271
|
+
Component(
|
|
272
|
+
id=_file_to_module_id(file, root, "pydb"),
|
|
273
|
+
name=_display_name(file, root),
|
|
274
|
+
responsibility=f"{source_module} database connection",
|
|
275
|
+
type=ComponentType.DATABASE,
|
|
276
|
+
detection_confidence=0.80,
|
|
277
|
+
status=ObjectStatus.CANDIDATE,
|
|
278
|
+
provenance=_make_provenance(),
|
|
279
|
+
paths=(str(file.relative_to(root)),),
|
|
280
|
+
)
|
|
281
|
+
]
|
|
282
|
+
return []
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def _detect_queue_clients(
|
|
286
|
+
file: Path,
|
|
287
|
+
root: Path,
|
|
288
|
+
tree: ast.Module,
|
|
289
|
+
) -> list[Component]:
|
|
290
|
+
"""Detect queue/worker client instantiation — one component per file."""
|
|
291
|
+
for node in ast.walk(tree):
|
|
292
|
+
if not isinstance(node, ast.Call):
|
|
293
|
+
continue
|
|
294
|
+
func_name = _get_call_name(node)
|
|
295
|
+
if func_name and func_name in _QUEUE_CALLS:
|
|
296
|
+
comp_type, responsibility = _QUEUE_CALLS[func_name]
|
|
297
|
+
return [
|
|
298
|
+
Component(
|
|
299
|
+
id=_file_to_module_id(file, root, "pyq"),
|
|
300
|
+
name=_display_name(file, root),
|
|
301
|
+
responsibility=responsibility,
|
|
302
|
+
type=comp_type,
|
|
303
|
+
detection_confidence=0.75,
|
|
304
|
+
status=ObjectStatus.CANDIDATE,
|
|
305
|
+
provenance=_make_provenance(),
|
|
306
|
+
paths=(str(file.relative_to(root)),),
|
|
307
|
+
)
|
|
308
|
+
]
|
|
309
|
+
return []
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def _build_module_index(components: list[Component]) -> dict[str, str]:
|
|
313
|
+
"""Map module dotted paths to component IDs.
|
|
314
|
+
|
|
315
|
+
Registers both the filesystem-relative path and the importable path.
|
|
316
|
+
For src-layout projects (path starts with ``src.``), the importable
|
|
317
|
+
path strips the ``src.`` prefix so ``from compos.db.session import …``
|
|
318
|
+
matches ``src/compos/db/session.py``.
|
|
319
|
+
|
|
320
|
+
Only indexes non-package (file-level) components.
|
|
321
|
+
"""
|
|
322
|
+
index: dict[str, str] = {}
|
|
323
|
+
for comp in components:
|
|
324
|
+
if comp.id.startswith("pypkg-"):
|
|
325
|
+
continue # package components use _build_package_module_index
|
|
326
|
+
parts = comp.id.split("-", 1)
|
|
327
|
+
if len(parts) == 2:
|
|
328
|
+
module_path = parts[1]
|
|
329
|
+
index[module_path] = comp.id
|
|
330
|
+
# src-layout: also register the importable path
|
|
331
|
+
if module_path.startswith("src."):
|
|
332
|
+
importable = module_path[4:] # strip "src."
|
|
333
|
+
index[importable] = comp.id
|
|
334
|
+
return index
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def _build_package_module_index(
|
|
338
|
+
package_components: list[Component],
|
|
339
|
+
source_root: Path,
|
|
340
|
+
project_root: Path,
|
|
341
|
+
) -> dict[str, str]:
|
|
342
|
+
"""Map dotted import paths to package-level component IDs.
|
|
343
|
+
|
|
344
|
+
For each package component ``pypkg-{parent}.{child}``, registers:
|
|
345
|
+
- The filesystem-relative path: ``src.{parent}.{child}`` (if src-layout)
|
|
346
|
+
- The importable path: ``{parent}.{child}``
|
|
347
|
+
|
|
348
|
+
These entries match both exact imports and sub-module imports via
|
|
349
|
+
prefix matching in ``_check_import``.
|
|
350
|
+
"""
|
|
351
|
+
index: dict[str, str] = {}
|
|
352
|
+
parent_name = source_root.name
|
|
353
|
+
# Determine if this is src-layout
|
|
354
|
+
is_src_layout = source_root.parent.name == "src"
|
|
355
|
+
|
|
356
|
+
for comp in package_components:
|
|
357
|
+
# id format: pypkg-{parent}.{child}
|
|
358
|
+
child_name = comp.id.split(".")[-1]
|
|
359
|
+
# Register importable path: {parent}.{child}
|
|
360
|
+
importable = f"{parent_name}.{child_name}"
|
|
361
|
+
index[importable] = comp.id
|
|
362
|
+
# Register filesystem-relative path (if src-layout)
|
|
363
|
+
if is_src_layout:
|
|
364
|
+
fs_path = f"src.{parent_name}.{child_name}"
|
|
365
|
+
index[fs_path] = comp.id
|
|
366
|
+
return index
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def _check_import(
|
|
370
|
+
module_name: str,
|
|
371
|
+
source_component_id: str,
|
|
372
|
+
module_index: dict[str, str],
|
|
373
|
+
seen_targets: set[str],
|
|
374
|
+
rels: list[Relationship],
|
|
375
|
+
root: Path,
|
|
376
|
+
file: Path,
|
|
377
|
+
) -> None:
|
|
378
|
+
"""Check if an import resolves to a known component and emit a relationship."""
|
|
379
|
+
for mod_path, target_id in module_index.items():
|
|
380
|
+
if target_id == source_component_id:
|
|
381
|
+
continue # skip self-references
|
|
382
|
+
if module_name == mod_path or module_name.startswith(mod_path + "."):
|
|
383
|
+
if target_id not in seen_targets:
|
|
384
|
+
seen_targets.add(target_id)
|
|
385
|
+
rels.append(
|
|
386
|
+
Relationship(
|
|
387
|
+
id=f"rel-{source_component_id}-{target_id}",
|
|
388
|
+
source=source_component_id,
|
|
389
|
+
target=target_id,
|
|
390
|
+
type=RelationshipType.DEPENDENCY,
|
|
391
|
+
pattern=RelationshipPattern.SYNCHRONOUS,
|
|
392
|
+
description="Python import dependency",
|
|
393
|
+
detection_confidence=0.70,
|
|
394
|
+
status=ObjectStatus.CANDIDATE,
|
|
395
|
+
provenance=_make_provenance(),
|
|
396
|
+
)
|
|
397
|
+
)
|
|
398
|
+
break # found a match, no need to check others
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def _detect_import_relationships(
|
|
402
|
+
file: Path,
|
|
403
|
+
root: Path,
|
|
404
|
+
tree: ast.Module,
|
|
405
|
+
source_component_id: str,
|
|
406
|
+
module_index: dict[str, str],
|
|
407
|
+
seen_targets: set[str] | None = None,
|
|
408
|
+
) -> list[Relationship]:
|
|
409
|
+
"""Walk AST for imports and emit DEPENDENCY relationships to known components."""
|
|
410
|
+
rels: list[Relationship] = []
|
|
411
|
+
if seen_targets is None:
|
|
412
|
+
seen_targets = set()
|
|
413
|
+
for node in ast.walk(tree):
|
|
414
|
+
module_name: str | None = None
|
|
415
|
+
if isinstance(node, ast.ImportFrom) and node.module:
|
|
416
|
+
module_name = node.module
|
|
417
|
+
elif isinstance(node, ast.Import):
|
|
418
|
+
for alias in node.names:
|
|
419
|
+
if alias.name:
|
|
420
|
+
_check_import(
|
|
421
|
+
alias.name,
|
|
422
|
+
source_component_id,
|
|
423
|
+
module_index,
|
|
424
|
+
seen_targets,
|
|
425
|
+
rels,
|
|
426
|
+
root,
|
|
427
|
+
file,
|
|
428
|
+
)
|
|
429
|
+
continue
|
|
430
|
+
if module_name is not None:
|
|
431
|
+
_check_import(
|
|
432
|
+
module_name,
|
|
433
|
+
source_component_id,
|
|
434
|
+
module_index,
|
|
435
|
+
seen_targets,
|
|
436
|
+
rels,
|
|
437
|
+
root,
|
|
438
|
+
file,
|
|
439
|
+
)
|
|
440
|
+
return rels
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
def _is_subpath(path: Path, directory: Path) -> bool:
|
|
444
|
+
"""Check if *path* is inside *directory* (non-strict)."""
|
|
445
|
+
try:
|
|
446
|
+
path.relative_to(directory)
|
|
447
|
+
return True
|
|
448
|
+
except ValueError:
|
|
449
|
+
return False
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
class PythonAnalyzer:
|
|
453
|
+
name = "python"
|
|
454
|
+
|
|
455
|
+
def can_analyze(self, project_root: Path) -> bool:
|
|
456
|
+
return any(True for _ in _iter_python_files(project_root))
|
|
457
|
+
|
|
458
|
+
def analyze(self, project_root: Path) -> AnalysisResult:
|
|
459
|
+
components: list[Component] = []
|
|
460
|
+
warnings: list[str] = []
|
|
461
|
+
# Track which file produced which component(s) and its parsed AST
|
|
462
|
+
file_components: dict[Path, list[Component]] = {}
|
|
463
|
+
parsed_trees: dict[Path, ast.Module] = {}
|
|
464
|
+
|
|
465
|
+
# Pass 0: detect package-level components
|
|
466
|
+
source_root = _find_source_root(project_root)
|
|
467
|
+
# Map package directory -> package component for merge lookups
|
|
468
|
+
pkg_dir_to_comp: dict[Path, Component] = {}
|
|
469
|
+
if source_root is not None:
|
|
470
|
+
pkg_components = _detect_packages(source_root, project_root)
|
|
471
|
+
for pkg_comp in pkg_components:
|
|
472
|
+
# Derive the package directory from the component id
|
|
473
|
+
# id format: pypkg-{parent}.{child} -> source_root / child
|
|
474
|
+
child_name = pkg_comp.id.split(".")[-1]
|
|
475
|
+
pkg_dir = source_root / child_name
|
|
476
|
+
pkg_dir_to_comp[pkg_dir] = pkg_comp
|
|
477
|
+
components.extend(pkg_components)
|
|
478
|
+
|
|
479
|
+
# Pass 1: detect file-level components, merging into packages
|
|
480
|
+
for py_file in _iter_python_files(project_root):
|
|
481
|
+
try:
|
|
482
|
+
tree = ast.parse(py_file.read_text())
|
|
483
|
+
except SyntaxError:
|
|
484
|
+
warnings.append(f"Skipped unparseable file: {py_file}")
|
|
485
|
+
continue
|
|
486
|
+
parsed_trees[py_file] = tree
|
|
487
|
+
file_comps: list[Component] = []
|
|
488
|
+
file_comps.extend(_detect_framework_apps(py_file, project_root, tree))
|
|
489
|
+
file_comps.extend(_detect_database_clients(py_file, project_root, tree))
|
|
490
|
+
file_comps.extend(_detect_queue_clients(py_file, project_root, tree))
|
|
491
|
+
if file_comps:
|
|
492
|
+
# Check if this file belongs to a detected package
|
|
493
|
+
owning_pkg_dir: Path | None = None
|
|
494
|
+
for pkg_dir in pkg_dir_to_comp:
|
|
495
|
+
if _is_subpath(py_file, pkg_dir):
|
|
496
|
+
owning_pkg_dir = pkg_dir
|
|
497
|
+
break
|
|
498
|
+
|
|
499
|
+
if owning_pkg_dir is not None:
|
|
500
|
+
# Merge: upgrade the package component instead of
|
|
501
|
+
# creating standalone file-level components.
|
|
502
|
+
# Pick the highest-confidence file detection to upgrade with.
|
|
503
|
+
best = max(file_comps, key=lambda c: c.detection_confidence)
|
|
504
|
+
existing_pkg = pkg_dir_to_comp[owning_pkg_dir]
|
|
505
|
+
# Only upgrade if the file detection has higher confidence
|
|
506
|
+
# or a more specific type than LIBRARY
|
|
507
|
+
if (
|
|
508
|
+
existing_pkg.type == ComponentType.LIBRARY
|
|
509
|
+
or best.detection_confidence > existing_pkg.detection_confidence
|
|
510
|
+
):
|
|
511
|
+
# Preserve the docstring-based responsibility if it
|
|
512
|
+
# was extracted from __init__.py (not the generic
|
|
513
|
+
# fallback). Only use the detection label when the
|
|
514
|
+
# package had no docstring.
|
|
515
|
+
responsibility = (
|
|
516
|
+
existing_pkg.responsibility
|
|
517
|
+
if existing_pkg.responsibility != "Python package"
|
|
518
|
+
else best.responsibility
|
|
519
|
+
)
|
|
520
|
+
upgraded = Component(
|
|
521
|
+
id=existing_pkg.id,
|
|
522
|
+
name=existing_pkg.name,
|
|
523
|
+
responsibility=responsibility,
|
|
524
|
+
type=best.type,
|
|
525
|
+
detection_confidence=best.detection_confidence,
|
|
526
|
+
status=existing_pkg.status,
|
|
527
|
+
provenance=existing_pkg.provenance,
|
|
528
|
+
paths=existing_pkg.paths,
|
|
529
|
+
)
|
|
530
|
+
# Replace in components list and tracking dict
|
|
531
|
+
components = [
|
|
532
|
+
upgraded if c.id == existing_pkg.id else c
|
|
533
|
+
for c in components
|
|
534
|
+
]
|
|
535
|
+
pkg_dir_to_comp[owning_pkg_dir] = upgraded
|
|
536
|
+
# Don't add file-level components — they're merged
|
|
537
|
+
else:
|
|
538
|
+
file_components[py_file] = file_comps
|
|
539
|
+
components.extend(file_comps)
|
|
540
|
+
|
|
541
|
+
# Pass 2: detect import relationships between components.
|
|
542
|
+
#
|
|
543
|
+
# Two kinds of components need scanning:
|
|
544
|
+
# (a) Package components — scan ALL parsed files inside their directory
|
|
545
|
+
# (b) Standalone file components — scan their subtree (old logic)
|
|
546
|
+
#
|
|
547
|
+
# Build a combined module index from both package and file components.
|
|
548
|
+
pkg_components_list = [c for c in components if c.id.startswith("pypkg-")]
|
|
549
|
+
pkg_module_index: dict[str, str] = {}
|
|
550
|
+
if source_root is not None and pkg_components_list:
|
|
551
|
+
pkg_module_index = _build_package_module_index(
|
|
552
|
+
pkg_components_list, source_root, project_root
|
|
553
|
+
)
|
|
554
|
+
file_module_index = _build_module_index(components)
|
|
555
|
+
combined_index = {**file_module_index, **pkg_module_index}
|
|
556
|
+
|
|
557
|
+
relationships: list[Relationship] = []
|
|
558
|
+
|
|
559
|
+
# (a) Package components: scan all parsed files in their directory subtree
|
|
560
|
+
for pkg_dir, pkg_comp in pkg_dir_to_comp.items():
|
|
561
|
+
seen: set[str] = set()
|
|
562
|
+
for py_file, tree in parsed_trees.items():
|
|
563
|
+
if _is_subpath(py_file, pkg_dir):
|
|
564
|
+
relationships.extend(
|
|
565
|
+
_detect_import_relationships(
|
|
566
|
+
py_file,
|
|
567
|
+
project_root,
|
|
568
|
+
tree,
|
|
569
|
+
pkg_comp.id,
|
|
570
|
+
combined_index,
|
|
571
|
+
seen_targets=seen,
|
|
572
|
+
)
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
# (b) Standalone file components (outside packages): old subtree logic
|
|
576
|
+
all_component_files = set(file_components.keys())
|
|
577
|
+
for py_file, file_comps in file_components.items():
|
|
578
|
+
comp_dir = py_file.parent
|
|
579
|
+
other_component_files = all_component_files - {py_file}
|
|
580
|
+
package_files = [
|
|
581
|
+
f
|
|
582
|
+
for f in parsed_trees
|
|
583
|
+
if (f == py_file or _is_subpath(f, comp_dir))
|
|
584
|
+
and f not in other_component_files
|
|
585
|
+
]
|
|
586
|
+
for comp in file_comps:
|
|
587
|
+
seen_standalone: set[str] = set()
|
|
588
|
+
for pkg_file in package_files:
|
|
589
|
+
relationships.extend(
|
|
590
|
+
_detect_import_relationships(
|
|
591
|
+
pkg_file,
|
|
592
|
+
project_root,
|
|
593
|
+
parsed_trees[pkg_file],
|
|
594
|
+
comp.id,
|
|
595
|
+
combined_index,
|
|
596
|
+
seen_targets=seen_standalone,
|
|
597
|
+
)
|
|
598
|
+
)
|
|
599
|
+
|
|
600
|
+
return AnalysisResult(
|
|
601
|
+
components=tuple(components),
|
|
602
|
+
relationships=tuple(relationships),
|
|
603
|
+
warnings=tuple(warnings),
|
|
604
|
+
)
|