PyPI - openhands-sdk - Versions diffs - 1.8.2__py3-none-any.whl → 1.9.1__py3-none-any.whl - Mend

openhands-sdk 1.8.2py3-none-any.whl → 1.9.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

openhands/sdk/agent/agent.py +64 -0
openhands/sdk/agent/base.py +22 -10
openhands/sdk/context/skills/skill.py +59 -1
openhands/sdk/context/skills/utils.py +6 -65
openhands/sdk/conversation/base.py +5 -0
openhands/sdk/conversation/impl/remote_conversation.py +16 -3
openhands/sdk/conversation/visualizer/base.py +23 -0
openhands/sdk/critic/__init__.py +4 -1
openhands/sdk/critic/base.py +17 -20
openhands/sdk/critic/impl/__init__.py +2 -0
openhands/sdk/critic/impl/agent_finished.py +9 -5
openhands/sdk/critic/impl/api/__init__.py +18 -0
openhands/sdk/critic/impl/api/chat_template.py +232 -0
openhands/sdk/critic/impl/api/client.py +313 -0
openhands/sdk/critic/impl/api/critic.py +90 -0
openhands/sdk/critic/impl/api/taxonomy.py +180 -0
openhands/sdk/critic/result.py +148 -0
openhands/sdk/event/llm_convertible/action.py +10 -0
openhands/sdk/event/llm_convertible/message.py +10 -0
openhands/sdk/git/cached_repo.py +459 -0
openhands/sdk/git/utils.py +118 -3
openhands/sdk/hooks/__init__.py +7 -1
openhands/sdk/hooks/config.py +154 -45
openhands/sdk/llm/utils/model_features.py +3 -0
openhands/sdk/plugin/__init__.py +17 -0
openhands/sdk/plugin/fetch.py +231 -0
openhands/sdk/plugin/plugin.py +61 -4
openhands/sdk/plugin/types.py +394 -1
{openhands_sdk-1.8.2.dist-info → openhands_sdk-1.9.1.dist-info}/METADATA +5 -1
{openhands_sdk-1.8.2.dist-info → openhands_sdk-1.9.1.dist-info}/RECORD +32 -24
{openhands_sdk-1.8.2.dist-info → openhands_sdk-1.9.1.dist-info}/WHEEL +1 -1
{openhands_sdk-1.8.2.dist-info → openhands_sdk-1.9.1.dist-info}/top_level.txt +0 -0

openhands/sdk/critic/impl/api/taxonomy.py ADDED Viewed

@@ -0,0 +1,180 @@
+"""Critic taxonomy - mapping of features to categories for visualization."""
+import math
+from typing import Any
+# Feature to category mapping
+FEATURE_CATEGORIES: dict[str, str] = {
+    # General Context & Task Classification
+    "user_goal_summary": "general_context",
+    "overall_sentiment": "general_context",
+    # Agent Behavioral Issues
+    "misunderstood_intention": "agent_behavioral_issues",
+    "did_not_follow_instruction": "agent_behavioral_issues",
+    "insufficient_analysis": "agent_behavioral_issues",
+    "insufficient_clarification": "agent_behavioral_issues",
+    "improper_tool_use_or_setup": "agent_behavioral_issues",
+    "loop_behavior": "agent_behavioral_issues",
+    "insufficient_testing": "agent_behavioral_issues",
+    "insufficient_debugging": "agent_behavioral_issues",
+    "incomplete_implementation": "agent_behavioral_issues",
+    "file_management_errors": "agent_behavioral_issues",
+    "scope_creep": "agent_behavioral_issues",
+    "risky_actions_or_permission": "agent_behavioral_issues",
+    "other_agent_issue": "agent_behavioral_issues",
+    # User Follow-Up Patterns
+    "follow_up_timing": "user_followup_patterns",
+    "clarification_or_restatement": "user_followup_patterns",
+    "correction": "user_followup_patterns",
+    "direction_change": "user_followup_patterns",
+    "vcs_update_requests": "user_followup_patterns",
+    "progress_or_scope_concern": "user_followup_patterns",
+    "frustration_or_complaint": "user_followup_patterns",
+    "removal_or_reversion_request": "user_followup_patterns",
+    "other_user_issue": "user_followup_patterns",
+    # Infrastructure Issues
+    "infrastructure_external_issue": "infrastructure_issues",
+    "infrastructure_agent_caused_issue": "infrastructure_issues",
+}
+# Category display names for visualization
+CATEGORY_DISPLAY_NAMES: dict[str, str] = {
+    "general_context": "General Context",
+    "agent_behavioral_issues": "Detected Agent Behavioral Issues",
+    "user_followup_patterns": "Predicted User Follow-Up Patterns",
+    "infrastructure_issues": "Detected Infrastructure Issues",
+}
+def get_category(feature_name: str) -> str | None:
+    """Get the category for a feature.
+    Args:
+        feature_name: Name of the feature
+    Returns:
+        Category name or None if not found
+    """
+    return FEATURE_CATEGORIES.get(feature_name)
+def _softmax_normalize(probs: dict[str, float]) -> dict[str, float]:
+    """Apply softmax normalization to convert logits to probabilities.
+    Args:
+        probs: Dictionary of names to raw probability/logit values
+    Returns:
+        Dictionary with softmax-normalized probabilities that sum to 1.0
+    """
+    if not probs:
+        return {}
+    values = list(probs.values())
+    exp_values = [math.exp(v) for v in values]
+    exp_sum = sum(exp_values)
+    normalized = [exp_v / exp_sum for exp_v in exp_values]
+    return dict(zip(probs.keys(), normalized))
+def categorize_features(
+    probs_dict: dict[str, float],
+    display_threshold: float = 0.2,
+) -> dict[str, Any]:
+    """Categorize features from probability dictionary into taxonomy groups.
+    This function takes raw probability outputs from the critic model and
+    organizes them into categories ready for visualization.
+    Args:
+        probs_dict: Dictionary of feature names to probability values
+        display_threshold: Minimum probability to include a feature (default: 0.2)
+    Returns:
+        Dictionary with categorized features ready for visualization:
+        {
+            "sentiment": {
+                "predicted": "Neutral",
+                "probability": 0.77,
+                "all": {"positive": 0.10, "neutral": 0.77, "negative": 0.13}
+            },
+            "agent_behavioral_issues": [
+                {"name": "loop_behavior", "display_name": "Loop Behavior",
+                 "probability": 0.85},
+                ...
+            ],
+            "user_followup_patterns": [...],
+            "infrastructure_issues": [...],
+            "other": [...]
+        }
+    """
+    result: dict[str, Any] = {
+        "sentiment": None,
+        "agent_behavioral_issues": [],
+        "user_followup_patterns": [],
+        "infrastructure_issues": [],
+        "other": [],
+    }
+    # Extract sentiment features and apply softmax normalization
+    raw_sentiment_probs = {}
+    for feature_name, prob in probs_dict.items():
+        if feature_name.startswith("sentiment_"):
+            short_name = feature_name.replace("sentiment_", "")
+            raw_sentiment_probs[short_name] = prob
+    if raw_sentiment_probs:
+        # Apply softmax normalization to convert logits to probabilities
+        sentiment_probs = _softmax_normalize(raw_sentiment_probs)
+        max_sentiment = max(sentiment_probs.items(), key=lambda x: x[1])
+        result["sentiment"] = {
+            "predicted": max_sentiment[0].capitalize(),
+            "probability": max_sentiment[1],
+            "all": sentiment_probs,
+        }
+    # Categorize other features
+    for feature_name, prob in probs_dict.items():
+        # Skip sentiment features (already processed)
+        if feature_name.startswith("sentiment_"):
+            continue
+        # Skip 'success' as it's redundant with the score
+        if feature_name == "success":
+            continue
+        # Skip features below threshold
+        if prob < display_threshold:
+            continue
+        category = FEATURE_CATEGORIES.get(feature_name)
+        feature_entry = {
+            "name": feature_name,
+            "display_name": feature_name.replace("_", " ").title(),
+            "probability": prob,
+        }
+        if category == "general_context":
+            # Skip general context features for now
+            continue
+        elif category == "agent_behavioral_issues":
+            result["agent_behavioral_issues"].append(feature_entry)
+        elif category == "user_followup_patterns":
+            result["user_followup_patterns"].append(feature_entry)
+        elif category == "infrastructure_issues":
+            result["infrastructure_issues"].append(feature_entry)
+        else:
+            result["other"].append(feature_entry)
+    # Sort each category by probability (descending)
+    for key in [
+        "agent_behavioral_issues",
+        "user_followup_patterns",
+        "infrastructure_issues",
+        "other",
+    ]:
+        result[key] = sorted(result[key], key=lambda x: x["probability"], reverse=True)
+    return result

openhands/sdk/critic/result.py ADDED Viewed

@@ -0,0 +1,148 @@
+from typing import Any, ClassVar
+from pydantic import BaseModel, Field
+from rich.text import Text
+class CriticResult(BaseModel):
+    """A critic result is a score and a message."""
+    THRESHOLD: ClassVar[float] = 0.5
+    DISPLAY_THRESHOLD: ClassVar[float] = 0.2  # Only show scores above this threshold
+    score: float = Field(
+        description="A predicted probability of success between 0 and 1.",
+        ge=0.0,
+        le=1.0,
+    )
+    message: str | None = Field(description="An optional message explaining the score.")
+    metadata: dict[str, Any] | None = Field(
+        default=None,
+        description=(
+            "Optional metadata about the critic evaluation. "
+            "Can include event_ids and categorized_features for visualization."
+        ),
+    )
+    @property
+    def success(self) -> bool:
+        """Whether the agent is successful."""
+        return self.score >= CriticResult.THRESHOLD
+    @staticmethod
+    def _get_star_rating(score: float) -> str:
+        """Convert score (0-1) to a 5-star rating string.
+        Each star represents 20% of the score.
+        """
+        filled_stars = round(score * 5)
+        empty_stars = 5 - filled_stars
+        return "★" * filled_stars + "☆" * empty_stars
+    @staticmethod
+    def _get_star_style(score: float) -> str:
+        """Get the style for the star rating based on score."""
+        if score >= 0.6:
+            return "green"
+        elif score >= 0.4:
+            return "yellow"
+        else:
+            return "red"
+    @property
+    def visualize(self) -> Text:
+        """Return Rich Text representation of the critic result."""
+        content = Text()
+        content.append("\n\nCritic: agent success likelihood ", style="bold")
+        # Display star rating with percentage
+        stars = self._get_star_rating(self.score)
+        style = self._get_star_style(self.score)
+        percentage = self.score * 100
+        content.append(stars, style=style)
+        content.append(f" ({percentage:.1f}%)", style="dim")
+        # Use categorized features from metadata if available
+        if self.metadata and "categorized_features" in self.metadata:
+            categorized = self.metadata["categorized_features"]
+            self._append_categorized_features(content, categorized)
+        else:
+            # Fallback: display message as-is
+            if self.message:
+                content.append(f"\n  {self.message}\n")
+            else:
+                content.append("\n")
+        return content
+    def _append_categorized_features(
+        self, content: Text, categorized: dict[str, Any]
+    ) -> None:
+        """Append categorized features to content, each category on its own line."""
+        has_content = False
+        # Agent behavioral issues
+        agent_issues = categorized.get("agent_behavioral_issues", [])
+        if agent_issues:
+            content.append("\n  ")
+            content.append("Potential Issues: ", style="bold")
+            self._append_feature_list_inline(content, agent_issues)
+            has_content = True
+        # User follow-up patterns
+        user_patterns = categorized.get("user_followup_patterns", [])
+        if user_patterns:
+            content.append("\n  ")
+            content.append("Likely Follow-up: ", style="bold")
+            self._append_feature_list_inline(content, user_patterns)
+            has_content = True
+        # Infrastructure issues
+        infra_issues = categorized.get("infrastructure_issues", [])
+        if infra_issues:
+            content.append("\n  ")
+            content.append("Infrastructure: ", style="bold")
+            self._append_feature_list_inline(content, infra_issues)
+            has_content = True
+        # Other metrics
+        other = categorized.get("other", [])
+        if other:
+            content.append("\n  ")
+            content.append("Other: ", style="bold")
+            self._append_feature_list_inline(content, other, is_other=True)
+            has_content = True
+        if not has_content:
+            content.append("\n")
+        else:
+            content.append("\n")
+    def _append_feature_list_inline(
+        self,
+        content: Text,
+        features: list[dict[str, Any]],
+        is_other: bool = False,
+    ) -> None:
+        """Append features inline with likelihood percentages."""
+        for i, feature in enumerate(features):
+            display_name = feature.get("display_name", feature.get("name", "Unknown"))
+            prob = feature.get("probability", 0.0)
+            percentage = prob * 100
+            # Get style based on probability
+            if is_other:
+                prob_style = "white"
+            elif prob >= 0.7:
+                prob_style = "red bold"
+            elif prob >= 0.5:
+                prob_style = "yellow"
+            else:
+                prob_style = "dim"
+            # Add dot separator between features
+            if i > 0:
+                content.append(" · ", style="dim")
+            content.append(f"{display_name}", style="white")
+            content.append(f" (likelihood {percentage:.0f}%)", style=prob_style)

openhands/sdk/event/llm_convertible/action.py CHANGED Viewed

@@ -3,6 +3,7 @@ from collections.abc import Sequence
 from pydantic import Field
 from rich.text import Text
+from openhands.sdk.critic.result import CriticResult
 from openhands.sdk.event.base import N_CHAR_PREVIEW, EventID, LLMConvertibleEvent
 from openhands.sdk.event.types import SourceType, ToolCallID
 from openhands.sdk.llm import (
@@ -65,6 +66,11 @@ class ActionEvent(LLMConvertibleEvent):
         description="The LLM's assessment of the safety risk of this action.",
     )
+    critic_result: CriticResult | None = Field(
+        default=None,
+        description="Optional critic evaluation of this action and preceding history.",
+    )
     summary: str | None = Field(
         default=None,
         description=(
@@ -125,6 +131,10 @@ class ActionEvent(LLMConvertibleEvent):
             content.append("Function call:\n", style="bold")
             content.append(f"- {self.tool_call.name} ({self.tool_call.id})\n")
+        # Display critic result if available
+        if self.critic_result is not None:
+            content.append(self.critic_result.visualize)
         return content
     def to_llm_message(self) -> Message:

openhands/sdk/event/llm_convertible/message.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import ClassVar
 from pydantic import ConfigDict, Field
 from rich.text import Text
+from openhands.sdk.critic.result import CriticResult
 from openhands.sdk.event.base import N_CHAR_PREVIEW, EventID, LLMConvertibleEvent
 from openhands.sdk.event.types import SourceType
 from openhands.sdk.llm import (
@@ -51,6 +52,11 @@ class MessageEvent(LLMConvertibleEvent):
         ),
     )
+    critic_result: CriticResult | None = Field(
+        default=None,
+        description="Optional critic evaluation of this message and preceding history.",
+    )
     @property
     def reasoning_content(self) -> str:
         return self.llm_message.reasoning_content or ""
@@ -101,6 +107,10 @@ class MessageEvent(LLMConvertibleEvent):
             )
             content.append(" ".join(text_parts))
+        # Display critic result if available
+        if self.critic_result is not None:
+            content.append(self.critic_result.visualize)
         return content
     def to_llm_message(self) -> Message:

openhands-sdk 1.8.2__py3-none-any.whl → 1.9.1__py3-none-any.whl

openhands-sdk 1.8.2py3-none-any.whl → 1.9.1py3-none-any.whl