PyPI - codedebrief - Versions diffs - 0.11.0__py3-none-any.whl - Mend

codedebrief 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

codedebrief/__init__.py +12 -0
codedebrief/analysis/__init__.py +16 -0
codedebrief/analysis/common.py +527 -0
codedebrief/analysis/discovery.py +100 -0
codedebrief/analysis/languages/__init__.py +6 -0
codedebrief/analysis/languages/_common.py +68 -0
codedebrief/analysis/languages/c.py +96 -0
codedebrief/analysis/languages/cpp.py +146 -0
codedebrief/analysis/languages/csharp.py +137 -0
codedebrief/analysis/languages/go.py +157 -0
codedebrief/analysis/languages/java.py +158 -0
codedebrief/analysis/languages/php.py +83 -0
codedebrief/analysis/languages/ruby.py +75 -0
codedebrief/analysis/languages/rust.py +96 -0
codedebrief/analysis/project.py +373 -0
codedebrief/analysis/python.py +939 -0
codedebrief/analysis/registry.py +320 -0
codedebrief/analysis/treesitter.py +884 -0
codedebrief/analysis/typescript.py +1019 -0
codedebrief/artifacts.py +49 -0
codedebrief/cli.py +585 -0
codedebrief/config.py +226 -0
codedebrief/doctor.py +175 -0
codedebrief/install.py +441 -0
codedebrief/mcp_server.py +2720 -0
codedebrief/model.py +189 -0
codedebrief/py.typed +1 -0
codedebrief/quality.py +392 -0
codedebrief/query.py +641 -0
codedebrief/render/__init__.py +6 -0
codedebrief/render/assets/generated/codedebrief-viewer-runtime.iife.js +10 -0
codedebrief/render/assets/panels.js +462 -0
codedebrief/render/assets/shell.js +1649 -0
codedebrief/render/assets/styles.css +1715 -0
codedebrief/render/assets/tree.js +616 -0
codedebrief/render/html.py +191 -0
codedebrief/render/markdown.py +153 -0
codedebrief/render/payload.py +326 -0
codedebrief/render/snapshot.py +769 -0
codedebrief/schema/codedebrief.schema.json +449 -0
codedebrief/util.py +65 -0
codedebrief/validation.py +214 -0
codedebrief-0.11.0.dist-info/METADATA +426 -0
codedebrief-0.11.0.dist-info/RECORD +48 -0
codedebrief-0.11.0.dist-info/WHEEL +4 -0
codedebrief-0.11.0.dist-info/entry_points.txt +2 -0
codedebrief-0.11.0.dist-info/licenses/LICENSE +176 -0
codedebrief-0.11.0.dist-info/licenses/NOTICE +9 -0

codedebrief/model.py ADDED Viewed

@@ -0,0 +1,189 @@
+from __future__ import annotations
+from dataclasses import asdict, dataclass, field
+from datetime import datetime, timezone
+from enum import Enum
+from pathlib import Path
+from typing import Any
+class Evidence(str, Enum):
+    VERIFIED = "VERIFIED"
+    INFERRED = "INFERRED"
+    POTENTIAL_GAP = "POTENTIAL_GAP"
+class NodeKind(str, Enum):
+    ENTRY = "entry"
+    ACTION = "action"
+    DECISION = "decision"
+    CALL = "call"
+    TERMINAL = "terminal"
+    ERROR = "error"
+@dataclass(slots=True)
+class SourceLocation:
+    path: str
+    start_line: int
+    end_line: int
+@dataclass(slots=True)
+class FlowNode:
+    id: str
+    kind: NodeKind
+    label: str
+    location: SourceLocation
+    evidence: Evidence = Evidence.VERIFIED
+    detail: str = ""
+    metadata: dict[str, Any] = field(default_factory=dict)
+@dataclass(slots=True)
+class FlowEdge:
+    id: str
+    source: str
+    target: str
+    label: str = ""
+    evidence: Evidence = Evidence.VERIFIED
+@dataclass(slots=True)
+class Flow:
+    id: str
+    name: str
+    symbol: str
+    language: str
+    framework: str
+    entry_kind: str
+    is_entrypoint: bool
+    location: SourceLocation
+    nodes: list[FlowNode] = field(default_factory=list)
+    edges: list[FlowEdge] = field(default_factory=list)
+    calls: list[str] = field(default_factory=list)
+    called_by: list[str] = field(default_factory=list)
+    tests: list[str] = field(default_factory=list)
+    metadata: dict[str, Any] = field(default_factory=dict)
+@dataclass(slots=True)
+class FileRecord:
+    path: str
+    language: str
+    sha256: str
+    flow_ids: list[str] = field(default_factory=list)
+    dependencies: list[str] = field(default_factory=list)
+@dataclass(slots=True)
+class FileAnalysis:
+    path: str
+    language: str
+    sha256: str
+    flows: list[Flow] = field(default_factory=list)
+    enums: dict[str, list[str]] = field(default_factory=dict)
+    constants: dict[str, bool] = field(default_factory=dict)
+    dependencies: list[str] = field(default_factory=list)
+    def to_dict(self) -> dict[str, Any]:
+        return asdict(self)
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> FileAnalysis:
+        return cls(
+            path=data["path"],
+            language=data["language"],
+            sha256=data["sha256"],
+            flows=[_flow_from_dict(item) for item in data.get("flows", [])],
+            enums=data.get("enums", {}),
+            constants=data.get("constants", {}),
+            dependencies=data.get("dependencies", []),
+        )
+@dataclass(slots=True)
+class ProjectModel:
+    schema_version: str
+    generated_at: str
+    root: str
+    flows: list[Flow] = field(default_factory=list)
+    files: list[FileRecord] = field(default_factory=list)
+    metadata: dict[str, Any] = field(default_factory=dict)
+    @classmethod
+    def empty(cls, root: Path) -> ProjectModel:
+        return cls(
+            schema_version="2.0",
+            generated_at=datetime.now(timezone.utc).isoformat(),
+            root=str(root.resolve()),
+        )
+    def to_dict(self) -> dict[str, Any]:
+        return asdict(self)
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> ProjectModel:
+        # Loading a committed `codedebrief.json` deserializes untrusted JSON, so a malformed
+        # shape must surface as a clean ValueError, not a raw KeyError / TypeError traceback
+        # leaking to the CLI or the MCP transport.
+        if not isinstance(data, dict):
+            raise ValueError("malformed codedebrief.json: expected a JSON object at the top level")
+        try:
+            flows = [_flow_from_dict(item) for item in data.get("flows", [])]
+            files = [FileRecord(**item) for item in data.get("files", [])]
+            return cls(
+                schema_version=data["schema_version"],
+                generated_at=data["generated_at"],
+                root=data["root"],
+                flows=flows,
+                files=files,
+                metadata=data.get("metadata", {}),
+            )
+        except (KeyError, TypeError, ValueError) as error:
+            raise ValueError(f"malformed codedebrief.json: {error}") from error
+def _location_from_dict(data: dict[str, Any]) -> SourceLocation:
+    return SourceLocation(**data)
+def _node_from_dict(data: dict[str, Any]) -> FlowNode:
+    return FlowNode(
+        id=data["id"],
+        kind=NodeKind(data["kind"]),
+        label=data["label"],
+        location=_location_from_dict(data["location"]),
+        evidence=Evidence(data.get("evidence", Evidence.VERIFIED.value)),
+        detail=data.get("detail", ""),
+        metadata=data.get("metadata", {}),
+    )
+def _edge_from_dict(data: dict[str, Any]) -> FlowEdge:
+    return FlowEdge(
+        id=data["id"],
+        source=data["source"],
+        target=data["target"],
+        label=data.get("label", ""),
+        evidence=Evidence(data.get("evidence", Evidence.VERIFIED.value)),
+    )
+def _flow_from_dict(data: dict[str, Any]) -> Flow:
+    return Flow(
+        id=data["id"],
+        name=data["name"],
+        symbol=data["symbol"],
+        language=data["language"],
+        framework=data.get("framework", "generic"),
+        entry_kind=data.get("entry_kind", "function"),
+        is_entrypoint=data.get("is_entrypoint", False),
+        location=_location_from_dict(data["location"]),
+        nodes=[_node_from_dict(item) for item in data.get("nodes", [])],
+        edges=[_edge_from_dict(item) for item in data.get("edges", [])],
+        calls=data.get("calls", []),
+        called_by=data.get("called_by", []),
+        tests=data.get("tests", []),
+        metadata=data.get("metadata", {}),
+    )

codedebrief/py.typed ADDED Viewed

	@@ -0,0 +1 @@
1	+

codedebrief/quality.py ADDED Viewed

@@ -0,0 +1,392 @@
+from __future__ import annotations
+from collections import Counter
+from typing import Any
+from codedebrief.model import Flow, FlowNode, NodeKind, ProjectModel
+GENERIC_LABELS = {
+    "call",
+    "return",
+    "raise",
+    "action",
+    "branch",
+    "condition",
+    "unknown",
+}
+LOW_CONFIDENCE = {"low", "none"}
+HUGE_FLOW_NODE_THRESHOLD = 60
+DENSE_EDGE_RATIO_THRESHOLD = 2.6
+def model_quality(model: ProjectModel) -> dict[str, Any]:
+    """Deterministic analyzer-quality metrics derived from one persisted model."""
+    non_test_flows = [flow for flow in model.flows if not flow.metadata.get("test")]
+    call_nodes = [node for flow in model.flows for node in flow.nodes if node.kind is NodeKind.CALL]
+    resolved = [node for node in call_nodes if node.metadata.get("target_flow")]
+    ambiguous = [node for node in call_nodes if len(node.metadata.get("call_candidates", [])) > 1]
+    unresolved = [
+        node
+        for node in call_nodes
+        if not node.metadata.get("target_flow") and not node.metadata.get("call_candidates")
+    ]
+    low_confidence = [
+        node
+        for node in call_nodes
+        if str(node.metadata.get("link_confidence", "")).lower() in LOW_CONFIDENCE
+    ]
+    node_count = sum(len(flow.nodes) for flow in model.flows)
+    edge_count = sum(len(flow.edges) for flow in model.flows)
+    generic_labels = _generic_label_nodes(model.flows)
+    source_locations = _source_location_nodes(model.flows)
+    skipped_files = _skipped_files(model)
+    parse_error_files = _parse_error_files(model.flows)
+    huge_flows = [
+        {
+            "flow_id": flow.id,
+            "name": flow.name,
+            "nodes": len(flow.nodes),
+            "source": f"{flow.location.path}:{flow.location.start_line}",
+        }
+        for flow in non_test_flows
+        if len(flow.nodes) >= HUGE_FLOW_NODE_THRESHOLD
+    ]
+    edge_ratio = round(edge_count / node_count, 2) if node_count else 0.0
+    return {
+        "files": {
+            "total": len(model.files),
+            "by_language": dict(Counter(record.language for record in model.files)),
+            "empty": sum(1 for record in model.files if not record.flow_ids),
+            "skipped": {
+                "total": len(skipped_files),
+                "by_reason": dict(Counter(item["reason"] for item in skipped_files)),
+                "sample": skipped_files[:20],
+            },
+            "parse_errors": {
+                "total": len(parse_error_files),
+                "by_language": dict(Counter(item["language"] for item in parse_error_files)),
+                "sample": parse_error_files[:20],
+            },
+        },
+        "flows": {
+            "total": len(model.flows),
+            "non_test": len(non_test_flows),
+            "entrypoints": sum(flow.is_entrypoint for flow in non_test_flows),
+            "by_language": dict(Counter(flow.language for flow in non_test_flows)),
+            "by_entry_kind": dict(Counter(flow.entry_kind for flow in non_test_flows)),
+            "per_file": _flow_distribution(model.flows),
+            "huge": huge_flows[:20],
+        },
+        "calls": {
+            "total": len(call_nodes),
+            "resolved": len(resolved),
+            "unresolved": len(unresolved),
+            "ambiguous": len(ambiguous),
+            "low_confidence": len(low_confidence),
+            "resolution_rate": _ratio(len(resolved), len(call_nodes)),
+        },
+        "languages": _language_depth(
+            model,
+            non_test_flows=non_test_flows,
+            resolved_calls=resolved,
+            unresolved_calls=unresolved,
+            generic_labels=generic_labels,
+            skipped_files=skipped_files,
+            parse_error_files=parse_error_files,
+        ),
+        "labels": {
+            "generic_nodes": len(generic_labels),
+            "generic_ratio": _ratio(len(generic_labels), node_count),
+            "sample": generic_labels[:20],
+        },
+        "source_locations": {
+            "nodes_with_source": len(source_locations),
+            "coverage": _ratio(len(source_locations), node_count),
+        },
+        "graph": {
+            "nodes": node_count,
+            "edges": edge_count,
+            "edge_to_node_ratio": edge_ratio,
+            "dense_graph_warning": edge_ratio >= DENSE_EDGE_RATIO_THRESHOLD,
+        },
+    }
+def render_quality(quality: dict[str, Any]) -> str:
+    files = quality["files"]
+    flows = quality["flows"]
+    calls = quality["calls"]
+    labels = quality["labels"]
+    source = quality["source_locations"]
+    graph = quality["graph"]
+    languages = quality.get("languages", {})
+    language_depth = languages.get("depth", {}) if isinstance(languages, dict) else {}
+    attention = languages.get("attention", []) if isinstance(languages, dict) else []
+    lines = [
+        "Analysis quality:",
+        f"- Files: {files['total']} ({_format_counts(files['by_language'])})",
+        f"- Skipped files: {files['skipped']['total']}",
+        f"- Parse warnings: {files.get('parse_errors', {}).get('total', 0)}",
+        f"- Flows: {flows['total']} total, {flows['entrypoints']} entrypoints "
+        f"({_format_counts(flows['by_language'])})",
+        f"- Calls: {calls['resolved']}/{calls['total']} resolved "
+        f"({calls['resolution_rate']:.0%}); {calls['unresolved']} unresolved, "
+        f"{calls['ambiguous']} ambiguous, {calls['low_confidence']} low-confidence",
+        f"- Labels: {labels['generic_nodes']} generic nodes ({labels['generic_ratio']:.0%})",
+        f"- Source coverage: {source['nodes_with_source']} nodes ({source['coverage']:.0%})",
+        f"- Graph density: {graph['edges']} edges / {graph['nodes']} nodes "
+        f"({graph['edge_to_node_ratio']})",
+    ]
+    if language_depth:
+        lines.append(f"- Language depth: {len(language_depth)} observed language(s)")
+        lines.extend(
+            _format_language_depth(language, metrics)
+            for language, metrics in sorted(language_depth.items())[:5]
+        )
+    if attention:
+        lines.append("- Language attention signals:")
+        lines.extend(
+            f"  - {item['language']}: {', '.join(item['signals'])}" for item in attention[:5]
+        )
+    if flows["huge"]:
+        lines.append("- Huge flows:")
+        lines.extend(
+            f"  - {item['name']} ({item['nodes']} nodes, {item['source']})"
+            for item in flows["huge"][:5]
+        )
+    if files["skipped"]["sample"]:
+        lines.append("- Skipped file samples:")
+        lines.extend(
+            f"  - {item['path']} ({item['reason']})" for item in files["skipped"]["sample"][:5]
+        )
+    parse_errors = files.get("parse_errors", {})
+    if isinstance(parse_errors, dict) and parse_errors.get("sample"):
+        lines.append("- Parse warning samples:")
+        lines.extend(
+            f"  - {item['path']}:{item['line']} ({item['reason']})"
+            for item in parse_errors["sample"][:5]
+        )
+    if labels["sample"]:
+        lines.append("- Generic label samples:")
+        lines.extend(f"  - {item['label']} ({item['source']})" for item in labels["sample"][:5])
+    if graph["dense_graph_warning"]:
+        lines.append("- Warning: graph edge density is high; inspect layout and call resolution.")
+    return "\n".join(lines)
+def _flow_distribution(flows: list[Flow]) -> dict[str, Any]:
+    counts = Counter(flow.location.path for flow in flows)
+    values = sorted(counts.values())
+    if not values:
+        return {"min": 0, "max": 0, "avg": 0.0}
+    return {
+        "min": values[0],
+        "max": values[-1],
+        "avg": round(sum(values) / len(values), 2),
+    }
+def _language_depth(
+    model: ProjectModel,
+    *,
+    non_test_flows: list[Flow],
+    resolved_calls: list[FlowNode],
+    unresolved_calls: list[FlowNode],
+    generic_labels: list[dict[str, Any]],
+    skipped_files: list[dict[str, str]],
+    parse_error_files: list[dict[str, Any]],
+) -> dict[str, Any]:
+    file_counts = Counter(record.language for record in model.files)
+    files_with_flows = Counter(
+        record.language for record in model.files if getattr(record, "flow_ids", [])
+    )
+    flow_counts = Counter(flow.language for flow in non_test_flows)
+    entrypoint_counts = Counter(flow.language for flow in non_test_flows if flow.is_entrypoint)
+    decision_counts = Counter(
+        flow.language
+        for flow in non_test_flows
+        for node in flow.nodes
+        if node.kind is NodeKind.DECISION
+    )
+    resolved_ids = {id(node) for node in resolved_calls}
+    unresolved_ids = {id(node) for node in unresolved_calls}
+    call_counts: Counter[str] = Counter()
+    resolved_counts: Counter[str] = Counter()
+    unresolved_counts: Counter[str] = Counter()
+    source_counts: Counter[str] = Counter()
+    for flow in non_test_flows:
+        for node in flow.nodes:
+            if node.kind is NodeKind.CALL:
+                call_counts[flow.language] += 1
+                if id(node) in resolved_ids:
+                    resolved_counts[flow.language] += 1
+                if id(node) in unresolved_ids:
+                    unresolved_counts[flow.language] += 1
+            if node.location.path and node.location.start_line > 0 and node.location.end_line > 0:
+                source_counts[flow.language] += 1
+    generic_counts = Counter(_sample_language(item) for item in generic_labels)
+    node_counts = Counter(flow.language for flow in non_test_flows for _node in flow.nodes)
+    skipped_counts = Counter(item.get("language", "") for item in skipped_files)
+    parse_error_counts = Counter(item.get("language", "") for item in parse_error_files)
+    capabilities = model.metadata.get("language_capabilities", {})
+    languages = sorted(
+        {
+            *file_counts.keys(),
+            *flow_counts.keys(),
+            *skipped_counts.keys(),
+            *parse_error_counts.keys(),
+        }
+        - {""}
+    )
+    depth: dict[str, dict[str, Any]] = {}
+    attention: list[dict[str, Any]] = []
+    for language in languages:
+        files = file_counts[language]
+        flows = flow_counts[language]
+        calls = call_counts[language]
+        resolved = resolved_counts[language]
+        skipped = skipped_counts[language]
+        parse_errors = parse_error_counts[language]
+        nodes = node_counts[language]
+        metrics = {
+            "files": files,
+            "files_with_flows": files_with_flows[language],
+            "flow_file_coverage": _ratio(files_with_flows[language], files),
+            "flows": flows,
+            "entrypoints": entrypoint_counts[language],
+            "decisions": decision_counts[language],
+            "calls": calls,
+            "resolved_calls": resolved,
+            "unresolved_calls": unresolved_counts[language],
+            "call_resolution_rate": _ratio(resolved, calls),
+            "generic_nodes": generic_counts[language],
+            "generic_ratio": _ratio(generic_counts[language], nodes),
+            "source_coverage": _ratio(source_counts[language], nodes),
+            "skipped_files": skipped,
+            "parse_error_files": parse_errors,
+            "capability": capabilities.get(language, {}),
+        }
+        signals = _language_attention_signals(metrics)
+        if signals:
+            attention.append({"language": language, "signals": signals})
+        depth[language] = metrics
+    return {"depth": depth, "attention": attention}
+def _language_attention_signals(metrics: dict[str, Any]) -> list[str]:
+    signals = []
+    if metrics["skipped_files"]:
+        signals.append("skipped_files")
+    if metrics.get("parse_error_files"):
+        signals.append("parse_errors")
+    if metrics["files"] and not metrics["files_with_flows"]:
+        signals.append("no_flow_files")
+    if metrics["calls"] and metrics["call_resolution_rate"] < 0.5:
+        signals.append("low_call_resolution")
+    if metrics["generic_ratio"] >= 0.2:
+        signals.append("generic_labels")
+    if metrics["flows"] and metrics["source_coverage"] < 0.9:
+        signals.append("low_source_coverage")
+    return signals
+def _sample_language(item: dict[str, Any]) -> str:
+    return str(item.get("language", ""))
+def _format_language_depth(language: str, metrics: dict[str, Any]) -> str:
+    return (
+        f"  - {language}: {metrics['files']} files, {metrics['flows']} flows, "
+        f"{metrics['decisions']} decisions, {metrics['resolved_calls']}/{metrics['calls']} "
+        "calls resolved"
+    )
+def _generic_label_nodes(flows: list[Flow]) -> list[dict[str, Any]]:
+    rows: list[dict[str, Any]] = []
+    for flow in flows:
+        for node in flow.nodes:
+            if not _generic_label(node):
+                continue
+            rows.append(
+                {
+                    "flow_id": flow.id,
+                    "node_id": node.id,
+                    "label": node.label,
+                    "language": flow.language,
+                    "source": f"{node.location.path}:{node.location.start_line}",
+                }
+            )
+    return rows
+def _generic_label(node: FlowNode) -> bool:
+    label = " ".join(node.label.lower().split())
+    if label in GENERIC_LABELS:
+        return True
+    if node.kind is NodeKind.CALL and label.startswith("call "):
+        return len(label.split()) <= 2
+    return node.kind is NodeKind.ACTION and label in {"do work", "handle", "process"}
+def _source_location_nodes(flows: list[Flow]) -> list[FlowNode]:
+    return [
+        node
+        for flow in flows
+        for node in flow.nodes
+        if node.location.path and node.location.start_line > 0 and node.location.end_line > 0
+    ]
+def _parse_error_files(flows: list[Flow]) -> list[dict[str, Any]]:
+    by_path: dict[str, dict[str, Any]] = {}
+    for flow in flows:
+        parse_error = flow.metadata.get("parse_error")
+        if not isinstance(parse_error, dict):
+            continue
+        path = str(parse_error.get("path") or flow.location.path)
+        if not path:
+            continue
+        by_path.setdefault(
+            path,
+            {
+                "path": path,
+                "language": str(parse_error.get("language") or flow.language),
+                "line": int(parse_error.get("line") or flow.location.start_line),
+                "kind": str(parse_error.get("kind") or "ERROR"),
+                "reason": str(parse_error.get("reason") or "tree-sitter parse warning"),
+            },
+        )
+    return [by_path[path] for path in sorted(by_path)]
+def _skipped_files(model: ProjectModel) -> list[dict[str, str]]:
+    rows = model.metadata.get("skipped_files", [])
+    if not isinstance(rows, list):
+        return []
+    normalized = []
+    for item in rows:
+        if not isinstance(item, dict):
+            continue
+        path = item.get("path")
+        reason = item.get("reason")
+        language = item.get("language")
+        if isinstance(path, str) and isinstance(reason, str):
+            normalized.append(
+                {
+                    "path": path,
+                    "language": language if isinstance(language, str) else "",
+                    "reason": reason,
+                }
+            )
+    return normalized
+def _ratio(numerator: int, denominator: int) -> float:
+    return round(numerator / denominator, 4) if denominator else 0.0
+def _format_counts(counts: dict[str, int]) -> str:
+    if not counts:
+        return "none"
+    return ", ".join(f"{key}={value}" for key, value in sorted(counts.items()))