compos-cli 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,604 @@
1
+ """Python-specific detection."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import ast
6
+ import logging
7
+ from collections.abc import Iterator
8
+ from datetime import UTC, datetime
9
+ from pathlib import Path
10
+
11
+ from compos.cli.analyzers.base import AnalysisResult
12
+ from compos.schema.models import (
13
+ Component,
14
+ ComponentType,
15
+ ObjectStatus,
16
+ Provenance,
17
+ ProvenanceSource,
18
+ Relationship,
19
+ RelationshipPattern,
20
+ RelationshipType,
21
+ )
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ _SKIP_DIRS = frozenset(
26
+ {
27
+ ".venv",
28
+ "venv",
29
+ "node_modules",
30
+ "__pycache__",
31
+ ".git",
32
+ ".compos",
33
+ "site-packages",
34
+ ".tox",
35
+ ".mypy_cache",
36
+ ".worktrees",
37
+ "worktrees",
38
+ "tests",
39
+ "test",
40
+ }
41
+ )
42
+
43
+ _FRAMEWORK_CALLS: dict[str, str] = {
44
+ "FastAPI": "FastAPI application",
45
+ "Flask": "Flask application",
46
+ "Django": "Django application",
47
+ }
48
+
49
+ # call-name → responsibility label
50
+ _DB_CALLS: dict[str, str] = {
51
+ "create_engine": "SQLAlchemy database engine",
52
+ "create_async_engine": "SQLAlchemy async database engine",
53
+ "declarative_base": "SQLAlchemy declarative base",
54
+ "MongoClient": "MongoDB client",
55
+ "AsyncIOMotorClient": "MongoDB async client (Motor)",
56
+ "Redis": "Redis client",
57
+ "StrictRedis": "Redis client",
58
+ }
59
+
60
+ # Module-gated DB calls: only match when imported from specific modules
61
+ _DB_MODULE_CALLS: dict[str, set[str]] = {
62
+ "connect": {"psycopg2", "asyncpg"},
63
+ }
64
+
65
+ # call-name → (ComponentType, responsibility)
66
+ _QUEUE_CALLS: dict[str, tuple[ComponentType, str]] = {
67
+ "Celery": (ComponentType.WORKER, "Celery task worker"),
68
+ "KafkaProducer": (ComponentType.QUEUE, "Kafka producer"),
69
+ "KafkaConsumer": (ComponentType.QUEUE, "Kafka consumer"),
70
+ "BlockingConnection": (ComponentType.QUEUE, "RabbitMQ connection (pika)"),
71
+ "SelectConnection": (ComponentType.QUEUE, "RabbitMQ connection (pika)"),
72
+ }
73
+
74
+
75
+ _IGNORE_PACKAGE_DIRS = _SKIP_DIRS | frozenset(
76
+ {"tests", "test", "docs", "scripts", "bin", "migrations", "alembic"}
77
+ )
78
+
79
+
80
+ def _is_package(path: Path) -> bool:
81
+ """Return True if *path* is a directory containing ``__init__.py``."""
82
+ return path.is_dir() and (path / "__init__.py").is_file()
83
+
84
+
85
+ def _find_source_root(project_root: Path) -> Path | None:
86
+ """Find the main Python package root under *project_root*.
87
+
88
+ For src-layout projects (``src/<pkg>/``), returns the package dir under
89
+ ``src/``. For flat-layout (``<pkg>/``), returns the first package dir at
90
+ root. Returns ``None`` if no packages are found.
91
+ """
92
+ # src-layout: look inside src/ first
93
+ src_dir = project_root / "src"
94
+ if src_dir.is_dir():
95
+ for child in sorted(src_dir.iterdir()):
96
+ if child.name.startswith("."):
97
+ continue
98
+ if child.name in _IGNORE_PACKAGE_DIRS:
99
+ continue
100
+ if _is_package(child):
101
+ return child
102
+
103
+ # flat-layout: first qualifying package at project root
104
+ for child in sorted(project_root.iterdir()):
105
+ if child.name.startswith("."):
106
+ continue
107
+ if child.name in _IGNORE_PACKAGE_DIRS:
108
+ continue
109
+ if _is_package(child):
110
+ return child
111
+
112
+ return None
113
+
114
+
115
+ def _extract_init_docstring(package_dir: Path) -> str | None:
116
+ """Extract the first-line docstring from ``__init__.py``, or None."""
117
+ init_file = package_dir / "__init__.py"
118
+ if not init_file.is_file():
119
+ return None
120
+ try:
121
+ tree = ast.parse(init_file.read_text())
122
+ except SyntaxError:
123
+ return None
124
+ docstring = ast.get_docstring(tree)
125
+ if docstring:
126
+ return docstring.split("\n")[0]
127
+ return None
128
+
129
+
130
+ def _detect_packages(source_root: Path, project_root: Path) -> list[Component]:
131
+ """Detect direct child packages of *source_root* as LIBRARY components."""
132
+ parent_name = source_root.name
133
+ components: list[Component] = []
134
+ for child in sorted(source_root.iterdir()):
135
+ if child.name.startswith("."):
136
+ continue
137
+ if child.name in _IGNORE_PACKAGE_DIRS:
138
+ continue
139
+ if not _is_package(child):
140
+ continue
141
+ child_name = child.name
142
+ docstring = _extract_init_docstring(child)
143
+ responsibility = docstring if docstring else "Python package"
144
+ components.append(
145
+ Component(
146
+ id=f"pypkg-{parent_name}.{child_name}",
147
+ name=child_name,
148
+ responsibility=responsibility,
149
+ type=ComponentType.LIBRARY,
150
+ detection_confidence=0.70,
151
+ status=ObjectStatus.CANDIDATE,
152
+ provenance=_make_provenance(),
153
+ paths=(str(child.relative_to(project_root)) + "/",),
154
+ )
155
+ )
156
+ return components
157
+
158
+
159
+ def _make_provenance() -> Provenance:
160
+ return Provenance(
161
+ source=ProvenanceSource.STATIC_ANALYSIS,
162
+ tool="cli-analyze",
163
+ timestamp=datetime.now(UTC),
164
+ )
165
+
166
+
167
+ def _file_to_module_id(file: Path, root: Path, prefix: str) -> str:
168
+ rel = file.relative_to(root).with_suffix("")
169
+ module = str(rel).replace("/", ".").replace("\\", ".")
170
+ return f"{prefix}-{module}"
171
+
172
+
173
+ def _display_name(file: Path, root: Path) -> str:
174
+ """Human-readable name: parent/stem for disambiguation, stem if unique."""
175
+ rel = file.relative_to(root).with_suffix("")
176
+ parts = rel.parts
177
+ if len(parts) >= 2:
178
+ return f"{parts[-2]}/{parts[-1]}"
179
+ return parts[-1] if parts else file.stem
180
+
181
+
182
+ def _iter_python_files(root: Path) -> Iterator[Path]:
183
+ for p in sorted(root.rglob("*.py")):
184
+ if any(part in _SKIP_DIRS for part in p.parts):
185
+ continue
186
+ yield p
187
+
188
+
189
+ def _get_call_name(node: ast.Call) -> str | None:
190
+ """Extract the function/method name from a Call node."""
191
+ if isinstance(node.func, ast.Name):
192
+ return node.func.id
193
+ if isinstance(node.func, ast.Attribute):
194
+ return node.func.attr
195
+ return None
196
+
197
+
198
+ def _collect_imports(tree: ast.Module) -> dict[str, str]:
199
+ """Map imported names to their source modules.
200
+
201
+ e.g. ``from psycopg2 import connect`` → {"connect": "psycopg2"}
202
+ """
203
+ mapping: dict[str, str] = {}
204
+ for node in ast.walk(tree):
205
+ if isinstance(node, ast.ImportFrom) and node.module:
206
+ top_module = node.module.split(".")[0]
207
+ for alias in node.names:
208
+ mapping[alias.asname or alias.name] = top_module
209
+ return mapping
210
+
211
+
212
+ def _detect_framework_apps(
213
+ file: Path,
214
+ root: Path,
215
+ tree: ast.Module,
216
+ ) -> list[Component]:
217
+ components: list[Component] = []
218
+ for node in ast.walk(tree):
219
+ if not isinstance(node, ast.Call):
220
+ continue
221
+ func_name = _get_call_name(node)
222
+ if func_name and func_name in _FRAMEWORK_CALLS:
223
+ components.append(
224
+ Component(
225
+ id=_file_to_module_id(file, root, "pyapp"),
226
+ name=_display_name(file, root),
227
+ responsibility=_FRAMEWORK_CALLS[func_name],
228
+ type=ComponentType.SERVICE,
229
+ detection_confidence=0.85,
230
+ status=ObjectStatus.CANDIDATE,
231
+ provenance=_make_provenance(),
232
+ paths=(str(file.relative_to(root)),),
233
+ )
234
+ )
235
+ break # one component per file
236
+ return components
237
+
238
+
239
+ def _detect_database_clients(
240
+ file: Path,
241
+ root: Path,
242
+ tree: ast.Module,
243
+ ) -> list[Component]:
244
+ """Detect database client instantiation — one component per file."""
245
+ imports = _collect_imports(tree)
246
+ for node in ast.walk(tree):
247
+ if not isinstance(node, ast.Call):
248
+ continue
249
+ func_name = _get_call_name(node)
250
+ if func_name is None:
251
+ continue
252
+ # Direct match (e.g. create_engine, MongoClient, Redis)
253
+ if func_name in _DB_CALLS:
254
+ return [
255
+ Component(
256
+ id=_file_to_module_id(file, root, "pydb"),
257
+ name=_display_name(file, root),
258
+ responsibility=_DB_CALLS[func_name],
259
+ type=ComponentType.DATABASE,
260
+ detection_confidence=0.80,
261
+ status=ObjectStatus.CANDIDATE,
262
+ provenance=_make_provenance(),
263
+ paths=(str(file.relative_to(root)),),
264
+ )
265
+ ]
266
+ # Module-gated match (e.g. psycopg2.connect)
267
+ if func_name in _DB_MODULE_CALLS:
268
+ source_module = imports.get(func_name)
269
+ if source_module and source_module in _DB_MODULE_CALLS[func_name]:
270
+ return [
271
+ Component(
272
+ id=_file_to_module_id(file, root, "pydb"),
273
+ name=_display_name(file, root),
274
+ responsibility=f"{source_module} database connection",
275
+ type=ComponentType.DATABASE,
276
+ detection_confidence=0.80,
277
+ status=ObjectStatus.CANDIDATE,
278
+ provenance=_make_provenance(),
279
+ paths=(str(file.relative_to(root)),),
280
+ )
281
+ ]
282
+ return []
283
+
284
+
285
+ def _detect_queue_clients(
286
+ file: Path,
287
+ root: Path,
288
+ tree: ast.Module,
289
+ ) -> list[Component]:
290
+ """Detect queue/worker client instantiation — one component per file."""
291
+ for node in ast.walk(tree):
292
+ if not isinstance(node, ast.Call):
293
+ continue
294
+ func_name = _get_call_name(node)
295
+ if func_name and func_name in _QUEUE_CALLS:
296
+ comp_type, responsibility = _QUEUE_CALLS[func_name]
297
+ return [
298
+ Component(
299
+ id=_file_to_module_id(file, root, "pyq"),
300
+ name=_display_name(file, root),
301
+ responsibility=responsibility,
302
+ type=comp_type,
303
+ detection_confidence=0.75,
304
+ status=ObjectStatus.CANDIDATE,
305
+ provenance=_make_provenance(),
306
+ paths=(str(file.relative_to(root)),),
307
+ )
308
+ ]
309
+ return []
310
+
311
+
312
+ def _build_module_index(components: list[Component]) -> dict[str, str]:
313
+ """Map module dotted paths to component IDs.
314
+
315
+ Registers both the filesystem-relative path and the importable path.
316
+ For src-layout projects (path starts with ``src.``), the importable
317
+ path strips the ``src.`` prefix so ``from compos.db.session import …``
318
+ matches ``src/compos/db/session.py``.
319
+
320
+ Only indexes non-package (file-level) components.
321
+ """
322
+ index: dict[str, str] = {}
323
+ for comp in components:
324
+ if comp.id.startswith("pypkg-"):
325
+ continue # package components use _build_package_module_index
326
+ parts = comp.id.split("-", 1)
327
+ if len(parts) == 2:
328
+ module_path = parts[1]
329
+ index[module_path] = comp.id
330
+ # src-layout: also register the importable path
331
+ if module_path.startswith("src."):
332
+ importable = module_path[4:] # strip "src."
333
+ index[importable] = comp.id
334
+ return index
335
+
336
+
337
+ def _build_package_module_index(
338
+ package_components: list[Component],
339
+ source_root: Path,
340
+ project_root: Path,
341
+ ) -> dict[str, str]:
342
+ """Map dotted import paths to package-level component IDs.
343
+
344
+ For each package component ``pypkg-{parent}.{child}``, registers:
345
+ - The filesystem-relative path: ``src.{parent}.{child}`` (if src-layout)
346
+ - The importable path: ``{parent}.{child}``
347
+
348
+ These entries match both exact imports and sub-module imports via
349
+ prefix matching in ``_check_import``.
350
+ """
351
+ index: dict[str, str] = {}
352
+ parent_name = source_root.name
353
+ # Determine if this is src-layout
354
+ is_src_layout = source_root.parent.name == "src"
355
+
356
+ for comp in package_components:
357
+ # id format: pypkg-{parent}.{child}
358
+ child_name = comp.id.split(".")[-1]
359
+ # Register importable path: {parent}.{child}
360
+ importable = f"{parent_name}.{child_name}"
361
+ index[importable] = comp.id
362
+ # Register filesystem-relative path (if src-layout)
363
+ if is_src_layout:
364
+ fs_path = f"src.{parent_name}.{child_name}"
365
+ index[fs_path] = comp.id
366
+ return index
367
+
368
+
369
+ def _check_import(
370
+ module_name: str,
371
+ source_component_id: str,
372
+ module_index: dict[str, str],
373
+ seen_targets: set[str],
374
+ rels: list[Relationship],
375
+ root: Path,
376
+ file: Path,
377
+ ) -> None:
378
+ """Check if an import resolves to a known component and emit a relationship."""
379
+ for mod_path, target_id in module_index.items():
380
+ if target_id == source_component_id:
381
+ continue # skip self-references
382
+ if module_name == mod_path or module_name.startswith(mod_path + "."):
383
+ if target_id not in seen_targets:
384
+ seen_targets.add(target_id)
385
+ rels.append(
386
+ Relationship(
387
+ id=f"rel-{source_component_id}-{target_id}",
388
+ source=source_component_id,
389
+ target=target_id,
390
+ type=RelationshipType.DEPENDENCY,
391
+ pattern=RelationshipPattern.SYNCHRONOUS,
392
+ description="Python import dependency",
393
+ detection_confidence=0.70,
394
+ status=ObjectStatus.CANDIDATE,
395
+ provenance=_make_provenance(),
396
+ )
397
+ )
398
+ break # found a match, no need to check others
399
+
400
+
401
+ def _detect_import_relationships(
402
+ file: Path,
403
+ root: Path,
404
+ tree: ast.Module,
405
+ source_component_id: str,
406
+ module_index: dict[str, str],
407
+ seen_targets: set[str] | None = None,
408
+ ) -> list[Relationship]:
409
+ """Walk AST for imports and emit DEPENDENCY relationships to known components."""
410
+ rels: list[Relationship] = []
411
+ if seen_targets is None:
412
+ seen_targets = set()
413
+ for node in ast.walk(tree):
414
+ module_name: str | None = None
415
+ if isinstance(node, ast.ImportFrom) and node.module:
416
+ module_name = node.module
417
+ elif isinstance(node, ast.Import):
418
+ for alias in node.names:
419
+ if alias.name:
420
+ _check_import(
421
+ alias.name,
422
+ source_component_id,
423
+ module_index,
424
+ seen_targets,
425
+ rels,
426
+ root,
427
+ file,
428
+ )
429
+ continue
430
+ if module_name is not None:
431
+ _check_import(
432
+ module_name,
433
+ source_component_id,
434
+ module_index,
435
+ seen_targets,
436
+ rels,
437
+ root,
438
+ file,
439
+ )
440
+ return rels
441
+
442
+
443
+ def _is_subpath(path: Path, directory: Path) -> bool:
444
+ """Check if *path* is inside *directory* (non-strict)."""
445
+ try:
446
+ path.relative_to(directory)
447
+ return True
448
+ except ValueError:
449
+ return False
450
+
451
+
452
+ class PythonAnalyzer:
453
+ name = "python"
454
+
455
+ def can_analyze(self, project_root: Path) -> bool:
456
+ return any(True for _ in _iter_python_files(project_root))
457
+
458
+ def analyze(self, project_root: Path) -> AnalysisResult:
459
+ components: list[Component] = []
460
+ warnings: list[str] = []
461
+ # Track which file produced which component(s) and its parsed AST
462
+ file_components: dict[Path, list[Component]] = {}
463
+ parsed_trees: dict[Path, ast.Module] = {}
464
+
465
+ # Pass 0: detect package-level components
466
+ source_root = _find_source_root(project_root)
467
+ # Map package directory -> package component for merge lookups
468
+ pkg_dir_to_comp: dict[Path, Component] = {}
469
+ if source_root is not None:
470
+ pkg_components = _detect_packages(source_root, project_root)
471
+ for pkg_comp in pkg_components:
472
+ # Derive the package directory from the component id
473
+ # id format: pypkg-{parent}.{child} -> source_root / child
474
+ child_name = pkg_comp.id.split(".")[-1]
475
+ pkg_dir = source_root / child_name
476
+ pkg_dir_to_comp[pkg_dir] = pkg_comp
477
+ components.extend(pkg_components)
478
+
479
+ # Pass 1: detect file-level components, merging into packages
480
+ for py_file in _iter_python_files(project_root):
481
+ try:
482
+ tree = ast.parse(py_file.read_text())
483
+ except SyntaxError:
484
+ warnings.append(f"Skipped unparseable file: {py_file}")
485
+ continue
486
+ parsed_trees[py_file] = tree
487
+ file_comps: list[Component] = []
488
+ file_comps.extend(_detect_framework_apps(py_file, project_root, tree))
489
+ file_comps.extend(_detect_database_clients(py_file, project_root, tree))
490
+ file_comps.extend(_detect_queue_clients(py_file, project_root, tree))
491
+ if file_comps:
492
+ # Check if this file belongs to a detected package
493
+ owning_pkg_dir: Path | None = None
494
+ for pkg_dir in pkg_dir_to_comp:
495
+ if _is_subpath(py_file, pkg_dir):
496
+ owning_pkg_dir = pkg_dir
497
+ break
498
+
499
+ if owning_pkg_dir is not None:
500
+ # Merge: upgrade the package component instead of
501
+ # creating standalone file-level components.
502
+ # Pick the highest-confidence file detection to upgrade with.
503
+ best = max(file_comps, key=lambda c: c.detection_confidence)
504
+ existing_pkg = pkg_dir_to_comp[owning_pkg_dir]
505
+ # Only upgrade if the file detection has higher confidence
506
+ # or a more specific type than LIBRARY
507
+ if (
508
+ existing_pkg.type == ComponentType.LIBRARY
509
+ or best.detection_confidence > existing_pkg.detection_confidence
510
+ ):
511
+ # Preserve the docstring-based responsibility if it
512
+ # was extracted from __init__.py (not the generic
513
+ # fallback). Only use the detection label when the
514
+ # package had no docstring.
515
+ responsibility = (
516
+ existing_pkg.responsibility
517
+ if existing_pkg.responsibility != "Python package"
518
+ else best.responsibility
519
+ )
520
+ upgraded = Component(
521
+ id=existing_pkg.id,
522
+ name=existing_pkg.name,
523
+ responsibility=responsibility,
524
+ type=best.type,
525
+ detection_confidence=best.detection_confidence,
526
+ status=existing_pkg.status,
527
+ provenance=existing_pkg.provenance,
528
+ paths=existing_pkg.paths,
529
+ )
530
+ # Replace in components list and tracking dict
531
+ components = [
532
+ upgraded if c.id == existing_pkg.id else c
533
+ for c in components
534
+ ]
535
+ pkg_dir_to_comp[owning_pkg_dir] = upgraded
536
+ # Don't add file-level components — they're merged
537
+ else:
538
+ file_components[py_file] = file_comps
539
+ components.extend(file_comps)
540
+
541
+ # Pass 2: detect import relationships between components.
542
+ #
543
+ # Two kinds of components need scanning:
544
+ # (a) Package components — scan ALL parsed files inside their directory
545
+ # (b) Standalone file components — scan their subtree (old logic)
546
+ #
547
+ # Build a combined module index from both package and file components.
548
+ pkg_components_list = [c for c in components if c.id.startswith("pypkg-")]
549
+ pkg_module_index: dict[str, str] = {}
550
+ if source_root is not None and pkg_components_list:
551
+ pkg_module_index = _build_package_module_index(
552
+ pkg_components_list, source_root, project_root
553
+ )
554
+ file_module_index = _build_module_index(components)
555
+ combined_index = {**file_module_index, **pkg_module_index}
556
+
557
+ relationships: list[Relationship] = []
558
+
559
+ # (a) Package components: scan all parsed files in their directory subtree
560
+ for pkg_dir, pkg_comp in pkg_dir_to_comp.items():
561
+ seen: set[str] = set()
562
+ for py_file, tree in parsed_trees.items():
563
+ if _is_subpath(py_file, pkg_dir):
564
+ relationships.extend(
565
+ _detect_import_relationships(
566
+ py_file,
567
+ project_root,
568
+ tree,
569
+ pkg_comp.id,
570
+ combined_index,
571
+ seen_targets=seen,
572
+ )
573
+ )
574
+
575
+ # (b) Standalone file components (outside packages): old subtree logic
576
+ all_component_files = set(file_components.keys())
577
+ for py_file, file_comps in file_components.items():
578
+ comp_dir = py_file.parent
579
+ other_component_files = all_component_files - {py_file}
580
+ package_files = [
581
+ f
582
+ for f in parsed_trees
583
+ if (f == py_file or _is_subpath(f, comp_dir))
584
+ and f not in other_component_files
585
+ ]
586
+ for comp in file_comps:
587
+ seen_standalone: set[str] = set()
588
+ for pkg_file in package_files:
589
+ relationships.extend(
590
+ _detect_import_relationships(
591
+ pkg_file,
592
+ project_root,
593
+ parsed_trees[pkg_file],
594
+ comp.id,
595
+ combined_index,
596
+ seen_targets=seen_standalone,
597
+ )
598
+ )
599
+
600
+ return AnalysisResult(
601
+ components=tuple(components),
602
+ relationships=tuple(relationships),
603
+ warnings=tuple(warnings),
604
+ )