PyPI - codeboarding - Versions diffs - 0.11.0__tar.gz → 0.12.0__tar.gz - Mend

codeboarding 0.11.0tar.gz → 0.12.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (206) hide show

{codeboarding-0.11.0/codeboarding.egg-info → codeboarding-0.12.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: codeboarding
-Version: 0.11.0
+Version: 0.12.0
 Summary: Interactive Diagrams for Code
 Author: CodeBoarding Team
 License-Expression: MIT
@@ -18,7 +18,6 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: docker>=7.1
 Requires-Dist: dotenv>=0.9
-Requires-Dist: duckdb>=1.3
 Requires-Dist: dulwich>=0.22
 Requires-Dist: fastapi>=0.115
 Requires-Dist: filelock>=3.12
@@ -34,6 +33,7 @@ Requires-Dist: langchain-community>=0.4
 Requires-Dist: langchain-google-genai>=3.1
 Requires-Dist: langchain-ollama>=1.0
 Requires-Dist: langchain-openai>=1.1
+Requires-Dist: leidenalg>=0.10
 Requires-Dist: markdown>=3.8
 Requires-Dist: markdown-it-py>=3.0
 Requires-Dist: markitdown>=0.1

{codeboarding-0.11.0 → codeboarding-0.12.0}/README.md RENAMED Viewed

@@ -143,7 +143,7 @@ python main.py full https://github.com/pytorch/pytorch
 ## Supported stack
-- Languages: Python, TypeScript, JavaScript, Java, Go, PHP, Rust.
+- Languages: Python, TypeScript, JavaScript, Java, Go, PHP, Rust, C#.
 - LLM providers: OpenAI, Anthropic, Google, Vercel AI Gateway, AWS Bedrock, Ollama, OpenRouter, and more.
 ## Examples

{codeboarding-0.11.0 → codeboarding-0.12.0}/agents/agent.py RENAMED Viewed

@@ -5,7 +5,7 @@ from pathlib import Path
 from google.api_core.exceptions import ResourceExhausted
 from langchain_core.exceptions import OutputParserException
 from langchain_core.language_models import BaseChatModel
-from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
+from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, ToolMessage
 from langchain_core.output_parsers import PydanticOutputParser
 from langchain_core.prompts import PromptTemplate
 from langchain.agents import create_agent
@@ -20,6 +20,7 @@ from agents.tools.toolkit import CodeBoardingToolkit
 from agents.validation import ValidationResult, score_validation_results, VALIDATOR_WEIGHTS, DEFAULT_VALIDATOR_WEIGHT
 from monitoring.mixin import MonitoringMixin
 from repo_utils.ignore import RepoIgnoreManager
+from agents.agent_responses import LLMBaseModel
 from agents.llm_config import MONITORING_CALLBACK
 from static_analyzer.analysis_result import StaticAnalysisResults
 from static_analyzer.reference_resolve_mixin import ReferenceResolverMixin
@@ -43,10 +44,10 @@ class CodeBoardingAgent(ReferenceResolverMixin, MonitoringMixin):
         ReferenceResolverMixin.__init__(self, repo_dir, static_analysis)
         MonitoringMixin.__init__(self)
         self.parsing_llm = parsing_llm
+        self.agent_llm = agent_llm
         self.repo_dir = repo_dir
         self.ignore_manager = RepoIgnoreManager(repo_dir)
-        # Initialize the professional toolkit
         context = RepoContext(repo_dir=repo_dir, ignore_manager=self.ignore_manager, static_analysis=static_analysis)
         self.toolkit = CodeBoardingToolkit(context=context)
@@ -200,10 +201,10 @@ class CodeBoardingAgent(ReferenceResolverMixin, MonitoringMixin):
         except Empty:
             raise RuntimeError("Agent invocation completed but no result was returned")
-    def _parse_invoke(self, prompt: str, type: type):
+    def _parse_invoke(self, prompt: str, type: type, include_hidden: bool = False):
         response = self._invoke(prompt)
         assert isinstance(response, str), f"Expected a string as response type got {response}"
-        return self._parse_response(prompt, response, type)
+        return self._parse_response(prompt, response, type, include_hidden=include_hidden)
     def _score_result(self, result, validators: list, context) -> tuple[float, list[tuple[float, str]]]:
         """Run all validators on a result and return (score, prioritized_feedback).
@@ -233,7 +234,13 @@ class CodeBoardingAgent(ReferenceResolverMixin, MonitoringMixin):
         return score, weighted_feedback
     def _validation_invoke(
-        self, prompt: str, return_type: type, validators: list, context, max_validation_attempts: int = 1
+        self,
+        prompt: str,
+        return_type: type,
+        validators: list,
+        context,
+        max_validation_attempts: int = 1,
+        include_hidden: bool = False,
     ):
         """
         Invoke LLM with validation, feedback loop, and best-of-N selection.
@@ -261,7 +268,12 @@ class CodeBoardingAgent(ReferenceResolverMixin, MonitoringMixin):
         # Compute the maximum possible score so we can detect a perfect result
         max_possible_score = sum(VALIDATOR_WEIGHTS.get(v.__name__, DEFAULT_VALIDATOR_WEIGHT) for v in validators)
-        result = self._parse_invoke(prompt, return_type)
+        result = self._parse_invoke(prompt, return_type, include_hidden=include_hidden)
+        logger.info(
+            "[Validation] Parsed %s: %s",
+            return_type.__name__,
+            result.llm_str()[:500],
+        )
         # Track the best candidate across all attempts
         best_result = result
@@ -314,42 +326,33 @@ class CodeBoardingAgent(ReferenceResolverMixin, MonitoringMixin):
                 f"[Validation] Preparing attempt {attempt + 1}/{max_validation_attempts} "
                 f"with {len(weighted_feedback)} feedback items"
             )
-            result = self._parse_invoke(feedback_prompt, return_type)
+            result = self._parse_invoke(feedback_prompt, return_type, include_hidden=include_hidden)
         return best_result
-    def _parse_response(self, prompt, response, return_type, max_retries=5, attempt=0):
+    def _parse_response(self, prompt, response, return_type, max_retries=5, attempt=0, include_hidden: bool = False):
         if response is None or response.strip() == "":
             logger.error(f"Empty response for prompt: {prompt}")
+        if include_hidden and issubclass(return_type, LLMBaseModel):
+            schema = return_type.model_json_schema(include_hidden=True)
+            parser = PydanticOutputParser(pydantic_object=return_type)
+            format_instructions = (
+                f"The output should be formatted as a JSON instance that conforms to the JSON schema below.\n"
+                f"Here is the output schema:\n```json\n{json.dumps(schema, indent=2)}\n```"
+            )
+        else:
+            parser = PydanticOutputParser(pydantic_object=return_type)
+            format_instructions = parser.get_format_instructions()
         def call_once():
-            # Extractor is rebuilt on every attempt — previous trustcall state
-            # may have corrupted attributes (see the tool_call_id bug below).
-            extractor = create_extractor(self.parsing_llm, tools=[return_type], tool_choice=return_type.__name__)
             try:
-                result = extractor.invoke(
-                    return_type.extractor_str() + response,
-                    config={"callbacks": [MONITORING_CALLBACK, self.agent_monitoring_callback]},
-                )
-            except AttributeError as e:
-                # Trustcall bug: https://github.com/hinthornw/trustcall/issues/47
-                # 'ExtractionState' object has no attribute 'tool_call_id' during validation retry.
-                # Treat as a non-retriable fallback to the Pydantic parser.
-                if "tool_call_id" in str(e):
-                    logger.warning(f"Trustcall bug encountered, falling back to Pydantic parser: {e}")
-                    parser = PydanticOutputParser(pydantic_object=return_type)
-                    return self._try_parse(response, parser)
-                raise
-            if "responses" in result and len(result["responses"]) != 0:
-                return return_type.model_validate(result["responses"][0])
-            if "messages" in result and len(result["messages"]) != 0:
-                message = result["messages"][0].content
-                parser = PydanticOutputParser(pydantic_object=return_type)
-                if not message:
-                    raise EmptyExtractorMessageError("Extractor returned empty message content")
-                return self._try_parse(message, parser)
-            parser = PydanticOutputParser(pydantic_object=return_type)
-            return self._try_parse(response, parser)
+                result = self._structured_parse(response, parser, format_instructions=format_instructions)
+                logger.debug("[parse_response] structured_parse succeeded for %s", return_type.__name__)
+                return result
+            except Exception as e:
+                logger.warning("[parse_response] structured_parse failed for %s: %s", return_type.__name__, e)
+            return self._extractor_parse(response, return_type, parser, include_hidden=include_hidden)
         def classify(exc: Exception, attempt: int) -> RetryDecision:
             if isinstance(exc, ResourceExhausted):
@@ -359,20 +362,15 @@ class CodeBoardingAgent(ReferenceResolverMixin, MonitoringMixin):
                 )
             if isinstance(exc, (EmptyExtractorMessageError, IndexError, json.JSONDecodeError, ValueError)):
                 return RetryDecision(action=RetryAction.RETRY_NOW)
-            # AttributeError (non-tool_call_id) and any other exception: give up.
             return RetryDecision(action=RetryAction.GIVE_UP)
         def on_exhausted(exc: Exception):
-            # Preserve historic shape: ResourceExhausted surfaces the original exception;
-            # parse-error exhaustion wraps with a descriptive message naming the response.
             if isinstance(exc, ResourceExhausted):
                 logger.error(f"Resource exhausted on final parsing attempt: {exc}")
                 raise exc
             logger.error(f"Max retries ({max_retries}) reached for parsing response: {response}")
             raise Exception(f"Max retries reached for parsing response: {response}")
-        # ``attempt`` kwarg kept for backwards-compat with callers that passed it;
-        # the effective attempt count is ``max_retries - attempt``.
         return with_retries(
             call_once,
             max_attempts=max(1, max_retries - attempt),
@@ -381,19 +379,21 @@ class CodeBoardingAgent(ReferenceResolverMixin, MonitoringMixin):
             log_prefix="Parse response",
         )
-    def _try_parse(self, message_content, parser):
-        try:
-            prompt_template = """You are an JSON expert. Here you need to extract information in the following json format: {format_instructions}
+    def _structured_parse(self, message_content, parser, format_instructions: str | None = None):
+        if format_instructions is None:
+            format_instructions = parser.get_format_instructions()
+        prompt_template = """You are a JSON expert. Here you need to extract information in the following json format: {format_instructions}
-            Here is the content to parse and fix: {adjective}
+        Here is the content to parse and fix: {adjective}
-            Please provide only the JSON output without any additional text."""
-            prompt = PromptTemplate(
-                template=prompt_template,
-                input_variables=["adjective"],
-                partial_variables={"format_instructions": parser.get_format_instructions()},
-            )
-            chain = prompt | self.parsing_llm | parser
+        Please provide only the JSON output without any additional text."""
+        prompt = PromptTemplate(
+            template=prompt_template,
+            input_variables=["adjective"],
+            partial_variables={"format_instructions": format_instructions},
+        )
+        chain = prompt | self.parsing_llm | parser
+        try:
             return chain.invoke(
                 {"adjective": message_content},
                 config={"callbacks": [MONITORING_CALLBACK, self.agent_monitoring_callback]},
@@ -401,7 +401,28 @@ class CodeBoardingAgent(ReferenceResolverMixin, MonitoringMixin):
         except (ValidationError, OutputParserException):
             for _, v in json.loads(message_content).items():
                 try:
-                    return self._try_parse(json.dumps(v), parser)
+                    return self._structured_parse(json.dumps(v), parser)
                 except:
                     pass
         raise ValueError(f"Couldn't parse {message_content}")
+    def _extractor_parse(self, response, return_type, parser, include_hidden: bool = False):
+        extractor = create_extractor(self.parsing_llm, tools=[return_type], tool_choice=return_type.__name__)
+        try:
+            result = extractor.invoke(
+                return_type.extractor_str(include_hidden=include_hidden) + response,
+                config={"callbacks": [MONITORING_CALLBACK, self.agent_monitoring_callback]},
+            )
+        except AttributeError as e:
+            if "tool_call_id" in str(e):
+                logger.warning(f"Trustcall bug encountered: {e}")
+                raise
+            raise
+        if "responses" in result and len(result["responses"]) != 0:
+            return return_type.model_validate(result["responses"][0])
+        if "messages" in result and len(result["messages"]) != 0:
+            message = result["messages"][0].content
+            if not message:
+                raise EmptyExtractorMessageError("Extractor returned empty message content")
+            return self._structured_parse(message, parser)
+        raise EmptyExtractorMessageError("Extractor returned no responses and no messages")

{codeboarding-0.11.0 → codeboarding-0.12.0}/agents/agent_responses.py RENAMED Viewed

@@ -7,6 +7,7 @@ from pathlib import PurePosixPath
 from typing import get_origin, Optional
 from pydantic import BaseModel, Field
+from pydantic.fields import FieldInfo
 logger = logging.getLogger(__name__)
@@ -19,30 +20,104 @@ class LLMBaseModel(BaseModel, abc.ABC):
         raise NotImplementedError("LLM String has to be implemented.")
     @classmethod
-    def extractor_str(cls):
-        # Here iterate over the fields that we have and use their description like:
-        result_str = "please extract the following: "
+    def _is_field_hidden(cls, fvalue: FieldInfo) -> bool:
+        if fvalue.exclude:
+            return True
+        extra = fvalue.json_schema_extra
+        if isinstance(extra, dict):
+            return bool(extra.get("hidden"))
+        return False
+    @classmethod
+    def _excluded_fields(cls, include_hidden: bool = False) -> set[str]:
+        if include_hidden:
+            return set()
+        names: set[str] = set()
+        for klass in cls.__mro__:
+            if hasattr(klass, "model_fields"):
+                for fname, finfo in klass.model_fields.items():
+                    if cls._is_field_hidden(finfo):
+                        names.add(fname)
+        return names
+    @classmethod
+    def _resolve_excluded_by_title(cls, include_hidden: bool = False) -> dict[str, set[str]]:
+        seen: set[type] = set()
+        result: dict[str, set[str]] = {}
+        def walk(model: type) -> None:
+            if model in seen or not hasattr(model, "model_fields"):
+                return
+            seen.add(model)
+            title = getattr(model, "__name__", "")
+            excluded = model._excluded_fields(include_hidden)  # type: ignore[attr-defined]
+            if excluded:
+                result[title] = excluded
+            for finfo in getattr(model, "model_fields", {}).values():
+                ann = finfo.annotation
+                for candidate in getattr(ann, "__args__", [ann]):
+                    if isinstance(candidate, type) and issubclass(candidate, LLMBaseModel):
+                        walk(candidate)  # type: ignore[arg-type]
+        walk(cls)
+        return result
+    @classmethod
+    def _extractor_fields(cls, indent: str = "  ", include_hidden: bool = False) -> str:
+        parts: list[str] = []
         for fname, fvalue in cls.model_fields.items():
-            if getattr(fvalue, "exclude", False):
+            if cls._is_field_hidden(fvalue) and not include_hidden:
                 continue
-            # check if the field type is Optional
             ftype = fvalue.annotation
-            # Check if the type is a typing.List (e.g., typing.List[SomeType])
             if get_origin(ftype) is list:
-                # get the type of the list:
                 if ftype is not None and hasattr(ftype, "__args__"):
-                    ftype = ftype.__args__[0]
-                result_str += f"{fname} which is a list ("
-            if ftype is Optional:
-                result_str += f"{fname} ({fvalue.description}), "
-            elif ftype is not None and isinstance(ftype, type) and issubclass(ftype, LLMBaseModel):
-                # Now I need to call the extractor_str method of the field
-                result_str += ftype.extractor_str()
+                    inner = ftype.__args__[0]
+                    if isinstance(inner, type) and issubclass(inner, LLMBaseModel):
+                        parts.append(
+                            f"{indent}- {fname}: a list, where each item has:\n{inner._extractor_fields(indent + '  ', include_hidden)}"
+                        )
+                        continue
+                parts.append(f"{indent}- {fname}: {fvalue.description}")
+            elif isinstance(ftype, type) and issubclass(ftype, LLMBaseModel):
+                parts.append(ftype._extractor_fields(indent, include_hidden))
             else:
-                result_str += f"{fname} ({fvalue.description}), "
-            if get_origin(ftype) is list:
-                result_str += "), "
-        return result_str
+                parts.append(f"{indent}- {fname}: {fvalue.description}")
+        return "\n".join(parts)
+    @classmethod
+    def extractor_str(cls, include_hidden: bool = False) -> str:
+        title = cls.__name__
+        fields = cls._extractor_fields(include_hidden=include_hidden)
+        return (
+            f"You are a JSON extraction expert. "
+            f"Extract a valid JSON object of type `{title}` from the text below.\n"
+            f"The JSON must have these fields:\n{fields}\n\n"
+        )
+    @classmethod
+    def model_json_schema(
+        cls,
+        by_alias: bool = True,
+        ref_template: str = "#/$defs/{model}",
+        schema_generator: type | None = None,
+        mode: str = "validation",
+        include_hidden: bool = False,
+        **kwargs,
+    ) -> dict:
+        call_kwargs: dict = {"by_alias": by_alias, "ref_template": ref_template, "mode": mode}
+        if schema_generator is not None:
+            call_kwargs["schema_generator"] = schema_generator
+        call_kwargs.update(kwargs)
+        schema = super().model_json_schema(**call_kwargs)
+        excluded_by_title = cls._resolve_excluded_by_title(include_hidden)
+        for title, excluded in excluded_by_title.items():
+            defn = schema.get("$defs", {}).get(title)
+            if isinstance(defn, dict) and "properties" in defn:
+                defn["properties"] = {k: v for k, v in defn["properties"].items() if k not in excluded}
+        own_excluded = cls._excluded_fields(include_hidden)
+        if "properties" in schema:
+            schema["properties"] = {k: v for k, v in schema["properties"].items() if k not in own_excluded}
+        return schema
 class SourceCodeReference(LLMBaseModel):
@@ -114,6 +189,39 @@ class ClustersComponent(LLMBaseModel):
     description: str = Field(
         description="Explanation of what this component does, its main flow, WHY these clusters are grouped together, how it interacts with other cluster groups, and the most important classes/methods (by their exact qualified names from the clusters)"
     )
+    existing_component_id: str | None = Field(
+        default=None,
+        description=(
+            "Incremental routing: the exact component_id of the existing component "
+            "this entry is routing clusters into (e.g. '1.3'). Set to null to create "
+            "a brand-new component. Identity is by ID, not name — leaving this null "
+            "while reusing an existing component's name forks a duplicate component. "
+            "Ignored by the full-analysis flow."
+        ),
+        json_schema_extra={"hidden": True},
+    )
+    parent_id: str | None = Field(
+        default=None,
+        description=(
+            "Incremental routing: when ``existing_component_id`` is null (brand-new "
+            "component), the existing component_id under which the new component "
+            "should attach (or null to attach at root). Ignored when "
+            "``existing_component_id`` is set, and ignored by the full-analysis flow."
+        ),
+        json_schema_extra={"hidden": True},
+    )
+    redetail_needed: bool = Field(
+        default=True,
+        description=(
+            "Incremental routing only: when routing clusters into an existing component "
+            "(``existing_component_id`` is set), set False if the cluster delta is "
+            "cosmetic (refactor, internal rename, small bug fix) and the component's "
+            "high-level purpose is unchanged — the existing description stays. Default "
+            "True forces a full redetail. Ignored for brand-new components (always "
+            "redetailed) and by the full-analysis flow."
+        ),
+        json_schema_extra={"hidden": True},
+    )
     def llm_str(self):
         ids_str = ", ".join(str(cid) for cid in self.cluster_ids)
@@ -151,15 +259,6 @@ class MethodEntry(BaseModel):
             return NotImplemented
         return self.qualified_name == other.qualified_name
-    @classmethod
-    def from_method_change(cls, method_change) -> MethodEntry:
-        return cls(
-            qualified_name=method_change.qualified_name,
-            start_line=method_change.start_line,
-            end_line=method_change.end_line,
-            node_type=method_change.node_type,
-        )
     @classmethod
     def from_node(cls, node) -> MethodEntry:
         """Build from a ``static_analyzer.Node``. Accepts ``Any`` to avoid a hard dep."""
@@ -210,18 +309,21 @@ class Component(LLMBaseModel):
         description="List of cluster IDs from CFG analysis that this component encompasses (populated deterministically from source_group_names).",
         default_factory=list,
         exclude=True,
+        json_schema_extra={"hidden": True},
     )
     file_methods: list[FileMethodGroup] = Field(
         description="All methods/functions belonging to this component, grouped by file (populated deterministically from cluster results).",
         default_factory=list,
         exclude=True,
+        json_schema_extra={"hidden": True},
     )
     component_id: str = Field(
         default="",
         description="Deterministic unique identifier for this component.",
         exclude=True,
+        json_schema_extra={"hidden": True},
     )
     def llm_str(self):
@@ -247,6 +349,7 @@ class AnalysisInsights(LLMBaseModel):
         default_factory=dict,
         description="Top-level file index keyed by relative file path. Contains all methods and statuses.",
         exclude=True,
+        json_schema_extra={"hidden": True},
     )
     components: list[Component] = Field(description="List of the components identified in the project.")
     components_relations: list[Relation] = Field(description="List of relations among the components.")
@@ -264,7 +367,7 @@ class AnalysisInsights(LLMBaseModel):
         return {str(PurePosixPath(fg.file_path)): c.component_id for c in self.components for fg in c.file_methods}
-def assign_component_ids(analysis: AnalysisInsights, parent_id: str = "") -> None:
+def assign_component_ids(analysis: AnalysisInsights, parent_id: str = "", only_new: bool = False) -> None:
     """Assign hierarchical component IDs based on sibling index.
     IDs encode structural position in the component tree:
@@ -272,11 +375,28 @@ def assign_component_ids(analysis: AnalysisInsights, parent_id: str = "") -> Non
     - Under "1" (parent_id="1"): "1.1", "1.2"
     - Under "1.2" (parent_id="1.2"): "1.2.1", "1.2.2"
-    These IDs serve as both component identifiers and cluster IDs,
-    enabling hierarchical relationship generalization.
+    With ``only_new=True`` (incremental path), components that already carry a
+    populated ``component_id`` are preserved verbatim and only siblings with an
+    empty id are assigned a fresh slot — used when stitching new components into
+    an existing tree without renumbering survivors.
     """
-    for idx, component in enumerate(analysis.components, start=1):
-        component.component_id = f"{parent_id}.{idx}" if parent_id else str(idx)
+    if only_new:
+        used_indices: set[int] = set()
+        for component in analysis.components:
+            if not component.component_id:
+                continue
+            tail = component.component_id.split(".")[-1]
+            if tail.isdigit():
+                used_indices.add(int(tail))
+        next_idx = max(used_indices, default=0) + 1
+        for component in analysis.components:
+            if component.component_id:
+                continue
+            component.component_id = f"{parent_id}.{next_idx}" if parent_id else str(next_idx)
+            next_idx += 1
+    else:
+        for idx, component in enumerate(analysis.components, start=1):
+            component.component_id = f"{parent_id}.{idx}" if parent_id else str(idx)
     # Assign relation IDs by looking up component names (first occurrence wins for duplicates)
     name_to_id: dict[str, str] = {}
@@ -293,6 +413,29 @@ def assign_component_ids(analysis: AnalysisInsights, parent_id: str = "") -> Non
         relation.dst_id = name_to_id.get(relation.dst_name, "")
+def iter_components(
+    root_analysis: AnalysisInsights,
+    sub_analyses: dict[str, AnalysisInsights],
+) -> list[Component]:
+    """Return every component across the root and all sub-analyses, in tree order."""
+    components = list(root_analysis.components)
+    for sub in sub_analyses.values():
+        components.extend(sub.components)
+    return components
+def index_components_by_id(
+    root_analysis: AnalysisInsights,
+    sub_analyses: dict[str, AnalysisInsights],
+) -> dict[str, Component]:
+    """Build a ``component_id -> Component`` lookup across the full tree.
+    Components without a ``component_id`` are skipped. Later occurrences of
+    the same id silently override earlier ones (sub-analyses win over root).
+    """
+    return {c.component_id: c for c in iter_components(root_analysis, sub_analyses) if c.component_id}
 class CFGComponent(LLMBaseModel):
     """A component derived from control flow graph analysis."""
@@ -416,6 +559,17 @@ class ComponentFiles(LLMBaseModel):
         return title + body
+class ScopeRelations(LLMBaseModel):
+    """Relations between components within a single scope."""
+    components_relations: list[Relation] = Field(description="Inter-component relationships within this scope.")
+    def llm_str(self):
+        if not self.components_relations:
+            return "No relations found."
+        return "\n".join(r.llm_str() for r in self.components_relations)
 class FilePath(LLMBaseModel):
     """File path with optional line range reference."""

{codeboarding-0.11.0 → codeboarding-0.12.0}/agents/cluster_methods_mixin.py RENAMED Viewed

@@ -32,7 +32,7 @@ from static_analyzer.cluster_relations import (
     build_node_to_component_map,
     merge_relations,
 )
-from static_analyzer.constants import CALLABLE_TYPES, CLASS_TYPES, NodeType
+from static_analyzer.constants import CALLABLE_TYPES, CLASS_TYPES, Language, NodeType
 from static_analyzer.graph import CallGraph, ClusterResult
 from static_analyzer.node import Node
@@ -70,7 +70,7 @@ class ClusterMethodsMixin:
     def _build_cluster_string(
         self,
-        programming_langs: list[str],
+        programming_langs: list[Language],
         cluster_results: dict[str, ClusterResult],
         cluster_ids: set[int] | None = None,
         prompt_overhead_chars: int = 0,
@@ -110,7 +110,7 @@ class ClusterMethodsMixin:
     def _render_cluster_string(
         self,
-        programming_langs: list[str],
+        programming_langs: list[Language],
         cluster_results: dict[str, ClusterResult],
         cluster_ids: set[int] | None,
         skip_sets: dict[str, set[str]],
@@ -146,7 +146,7 @@ class ClusterMethodsMixin:
     def _plan_skip_sets(
         self,
-        programming_langs: list[str],
+        programming_langs: list[Language],
         cluster_results: dict[str, ClusterResult],
         prompt_overhead_chars: int,
     ) -> dict[str, set[str]]:
@@ -472,7 +472,9 @@ class ClusterMethodsMixin:
         """
         all_nodes: dict[str, Node] = {}
         for lang in cluster_results:
-            cfg = cfg_graphs[lang] if cfg_graphs and lang in cfg_graphs else self.static_analysis.get_cfg(lang)
+            cfg = (
+                cfg_graphs[lang] if cfg_graphs and lang in cfg_graphs else self.static_analysis.get_cfg(Language(lang))
+            )
             all_nodes.update(cfg.nodes)
         return all_nodes
@@ -492,7 +494,9 @@ class ClusterMethodsMixin:
         """
         graphs: dict[str, nx.Graph] = {}
         for lang in cluster_results:
-            cfg = cfg_graphs[lang] if cfg_graphs and lang in cfg_graphs else self.static_analysis.get_cfg(lang)
+            cfg = (
+                cfg_graphs[lang] if cfg_graphs and lang in cfg_graphs else self.static_analysis.get_cfg(Language(lang))
+            )
             graphs[lang] = cfg.to_networkx().to_undirected()
         return graphs
@@ -703,7 +707,7 @@ class ClusterMethodsMixin:
         pct = (assigned_nodes / total_nodes * 100) if total_nodes else 0
         logger.info(f"Node coverage: {assigned_nodes}/{total_nodes} ({pct:.1f}%) nodes assigned to components")
-    def _build_files_index(self, analysis: AnalysisInsights) -> dict[str, FileEntry]:
+    def build_files_index(self, analysis: AnalysisInsights) -> dict[str, FileEntry]:
         files: dict[str, FileEntry] = {}
         for component in analysis.components:
             for fmg in component.file_methods:
@@ -761,7 +765,7 @@ class ClusterMethodsMixin:
         for comp in analysis.components:
             comp.file_methods = self._build_file_methods_from_nodes(component_nodes.get(comp.component_id, []))
-        analysis.files = self._build_files_index(analysis)
+        analysis.files = self.build_files_index(analysis)
         self._log_node_coverage(analysis, len(all_nodes))
@@ -784,3 +788,44 @@ class ClusterMethodsMixin:
         node_to_component = build_node_to_component_map(analysis)
         static_relations = build_component_relations(node_to_component, cfg_graphs)
         analysis.components_relations = merge_relations(analysis.components_relations, static_relations, analysis)
+    def build_scope_cfg_string(self, analysis: AnalysisInsights) -> str:
+        """Render cross-component communication edges as a human-readable string for the LLM.
+        For every CFG edge where src belongs to component A and dst belongs to
+        component B (A != B), this produces a grouped summary like:
+            ComponentA -> ComponentB (3 edges):
+              src_pkg.MethodX -> dst_pkg.MethodY
+              src_pkg.MethodZ -> dst_pkg.MethodW
+        """
+        node_to_component = build_node_to_component_map(analysis)
+        id_to_name = {c.component_id: c.name for c in analysis.components}
+        cfg_graphs = {lang: self.static_analysis.get_cfg(lang) for lang in self.static_analysis.get_languages()}
+        cross_edges: dict[tuple[str, str], list[tuple[str, str]]] = defaultdict(list)
+        for cfg in cfg_graphs.values():
+            for edge in cfg.edges:
+                src_name = edge.get_source()
+                dst_name = edge.get_destination()
+                src_comp = node_to_component.get(src_name)
+                dst_comp = node_to_component.get(dst_name)
+                if src_comp and dst_comp and src_comp != dst_comp:
+                    cross_edges[(src_comp, dst_comp)].append((src_name, dst_name))
+        if not cross_edges:
+            return "No cross-component communication edges found."
+        lines: list[str] = []
+        for (src_id, dst_id), edges in sorted(cross_edges.items()):
+            src_label = id_to_name.get(src_id, src_id)
+            dst_label = id_to_name.get(dst_id, dst_id)
+            lines.append(f"\n{src_label} -> {dst_label} ({len(edges)} edge{'s' if len(edges) != 1 else ''}):")
+            for s, d in edges[:10]:
+                short_s = s.split(".")[-1]
+                short_d = d.split(".")[-1]
+                lines.append(f"  {short_s} -> {short_d}")
+            if len(edges) > 10:
+                lines.append(f"  ... and {len(edges) - 10} more")
+        return "\n".join(lines)

codeboarding 0.11.0__tar.gz → 0.12.0__tar.gz

codeboarding 0.11.0tar.gz → 0.12.0tar.gz