PyPI - puli-plg - Versions diffs - 0.1.26__py3-none-any.whl - Mend

puli-plg 0.1.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

puli_mcp_server/__init__.py +0 -0
puli_mcp_server/credentials/.gitkeep +0 -0
puli_mcp_server/credentials/service-account.json +13 -0
puli_mcp_server/embedding_client/__init__.py +0 -0
puli_mcp_server/embedding_client/client.py +34 -0
puli_mcp_server/embedding_client/config.py +49 -0
puli_mcp_server/llm_agent/__init__.py +0 -0
puli_mcp_server/llm_agent/config.py +85 -0
puli_mcp_server/llm_agent/llm_agent.py +46 -0
puli_mcp_server/llm_agent/models.py +284 -0
puli_mcp_server/mcp_server/__init__.py +0 -0
puli_mcp_server/mcp_server/models.py +63 -0
puli_mcp_server/mcp_server/server.py +123 -0
puli_mcp_server/proxy_client/__init__.py +3 -0
puli_mcp_server/proxy_client/client.py +264 -0
puli_mcp_server/proxy_client/config.py +74 -0
puli_mcp_server/proxy_client/token_manager.py +36 -0
puli_mcp_server/test_diff.json +16 -0
puli_models/__init__.py +9 -0
puli_models/chaos_patterns.py +89 -0
puli_models/incidents.py +78 -0
puli_plg-0.1.26.dist-info/METADATA +14 -0
puli_plg-0.1.26.dist-info/RECORD +25 -0
puli_plg-0.1.26.dist-info/WHEEL +4 -0
puli_plg-0.1.26.dist-info/entry_points.txt +2 -0

puli_mcp_server/__init__.py ADDED Viewed

File without changes

puli_mcp_server/credentials/.gitkeep ADDED Viewed

File without changes

puli_mcp_server/credentials/service-account.json ADDED Viewed

@@ -0,0 +1,13 @@
+{
+  "type": "service_account",
+  "project_id": "totemic-formula-484216-s6",
+  "private_key_id": "8078be8f617d2c0c2c6e4cc7c94132135664c3c0",
+  "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDNQfmPOGwiNCNu\njztooUcrhOG6upIdA8BZxFg7UJKa4UrugXowoUZyyu1E3JzMr7kNRZMa3perDDl6\nP04HtA8mtqdBEnJeNIQlwV0FiGHuPuzEQBpKBjz2LCPOHaG0m5tQZYPngynzHdF6\nGLgpMaYpnPZGsqpRi6F44zI2+wj4MsGyGuA+pukeMiS1fAhjMUDhHDpN6kryLU9D\nLSx79oY04I3SE/kroGhtH5BPmv/fxpkhM8e1KzvHeZ3FKosfCskzsdaCNR0xCNSi\n+8SXaq0SJrXIkDDxQ0W3pxmfH0y17hX8tJSbsyeiCg3/+mJpsEbO0Z6colmfoJu+\nIOmuyCA/AgMBAAECggEAWMEe+u2gYHfgHFYGD9uxLSHEA9zaAn7VRVuv9VIFWsBx\niMlm/zE1h7y0tVKZ4K5ZW+JKVlOLDLT4A6LtCEq1RH21u98QoiiePb8rjJFpGz6N\nXd0EIR8qbB4xC1bnzfN9SNnknA6s7Innwx5+P53O5m6PVYm7ORCiLWY8l+ab/coZ\npY82vMrO0ohQwIbFiJjgnL2rO/sF/Z3sKR4Iw6EUbLV/oY33geGJ45hqsmVOxQ3C\nvfDxxVbot1wdGMHuVDQrrcQx7pNcODVaDwQ47rHPNmCu12L4ZAHL5UWkhNtlGV1N\nbO+zJUOVhlUKbs3Y2aOg4Z0QpRSFp9k9h9I6P24NwQKBgQD8rTbExPCJYDpV6R63\ndY1cbQ6+L2kKLHFyoH7XuPZfa/Z6PCgL2Msdp20as1vCDRgzAjAeO69cSfzbxrxC\ngvymyLub9IegHBQjjvxQz/w/TyPS3jo/buCarv43HNhpQr8rOLbk4fqAblvcwrau\ngDIYlxA+Zqmes8IKdy49UG6VQQKBgQDP9RjTM6IrURWOu6y1MaRMCVzeRLSzLFkK\nUFHIT0NOoS9hK641Z4diKw5uz0hXZgK+T2//AKcYD5utA68BITBHNmH0AwIxxtmr\nbutCDK+Fg5iUuoEXESD/tvDk1caTMCsFW8XuFcbPjlGmNoAQcHglWDWQcv4MgWG4\nFKMhpX3VfwKBgQDQKUcQjgp3sm38rsPechqWRUY7CkXn2rtPqsc0oy1daU0yYHLB\nZ8XV1UO+FnsGf9Eq3KeVkKgkSNPmn3Ai/1RzdrWQgsBk+BzjOn3FecMeyO0DYI7u\nCUNCS94kuz/SX+msCtop7712pvRJB6SGWBhtR65bKiqdEwxNoOfYHXYswQKBgGKz\nQIoVSpXmkSXCe3EwU3tnQjWYRG95z/TuNDKvNBHgvL0Q30knjwEP0HTVWs9ockrj\nJxPPun7YAZo3Ultl5iUWcZ0/xE3dxDfymCQsIy5qTAPjFRvenFvh9ymVMP9w8CpF\n6YZbvZWLRcNuwI+M4elPmXSqr1s+GSQmX1q70iJ/AoGARTjZDvA3wV4Y6+lAhydV\niLtkCm99Bs5bn/Rqpg2dJpX6pnZIWJerDczFK6I3hdl2YZxXx0pvsKL61MPhrqMS\nL99SXviX0709DG48kytIrnJkBvZMTk2WQVWMmswzbpDBa9/KTheKkMJuSLu+VtxU\nghBOZjGr/hILrSuUeQ9eC68=\n-----END PRIVATE KEY-----\n",
+  "client_email": "puli-mcp-client@totemic-formula-484216-s6.iam.gserviceaccount.com",
+  "client_id": "115504467731897363185",
+  "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+  "token_uri": "https://oauth2.googleapis.com/token",
+  "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
+  "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/puli-mcp-client%40totemic-formula-484216-s6.iam.gserviceaccount.com",
+  "universe_domain": "googleapis.com"
+}

puli_mcp_server/embedding_client/__init__.py ADDED Viewed

File without changes

puli_mcp_server/embedding_client/client.py ADDED Viewed

@@ -0,0 +1,34 @@
+from typing import List, Optional
+from openai import OpenAI
+from .config import EmbeddingConfig
+class EmbeddingClient:
+    """Client for generating text embeddings using OpenAI."""
+    def __init__(self, config: Optional[EmbeddingConfig] = None):
+        """
+        Initialize the embedding client.
+        Args:
+            config: EmbeddingConfig instance. If None, loads from environment variables.
+        """
+        self.config = config or EmbeddingConfig.from_env()
+        self.client = OpenAI(api_key=self.config.api_key)
+    def generate_embedding(self, text: str) -> List[float]:
+        """
+        Generate an embedding for the given text.
+        Args:
+            text: The text to embed.
+        Returns:
+            A list of floats representing the embedding vector.
+        """
+        # Ensure text isn't empty and replace newlines
+        clean_text = text.replace("\n", " ")
+        return self.client.embeddings.create(
+            input=[clean_text],
+            model=self.config.model
+        ).data[0].embedding

puli_mcp_server/embedding_client/config.py ADDED Viewed

@@ -0,0 +1,49 @@
+import os
+from dataclasses import dataclass
+from typing import Dict, Any
+EMBEDDING_ALGORITHM = "text-embedding-3-large"
+@dataclass
+class EmbeddingConfig:
+    """Configuration for Embedding client, loaded from environment variables."""
+    api_key: str
+    model: str
+    @classmethod
+    def from_env(cls) -> "EmbeddingConfig":
+        """Load configuration from environment variables."""
+        api_key = os.environ.get("OPENAI_API_KEY")
+        if not api_key:
+            raise ValueError("OpenAI API key is required. Set OPENAI_API_KEY environment variable.")
+        model = os.environ.get("EMBEDDING_ALGORITHM", EMBEDDING_ALGORITHM)
+        return cls(
+            api_key=api_key,
+            model=model,
+        )
+    @classmethod
+    def from_remote(cls, config: Dict[str, Any]) -> "EmbeddingConfig":
+        """Load configuration from remote config dict (fetched from proxy).
+        Args:
+            config: Configuration dictionary from proxy /config/mcp endpoint
+        Returns:
+            EmbeddingConfig instance with settings from remote config
+        """
+        api_key = os.environ.get("OPENAI_API_KEY")
+        if not api_key:
+            raise ValueError("OpenAI API key is required. Set OPENAI_API_KEY environment variable.")
+        model = config.get("EMBEDDING_ALGORITHM", EMBEDDING_ALGORITHM)
+        return cls(
+            api_key=api_key,
+            model=model,
+        )

puli_mcp_server/llm_agent/__init__.py ADDED Viewed

File without changes

puli_mcp_server/llm_agent/config.py ADDED Viewed

@@ -0,0 +1,85 @@
+import os
+import yaml
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, Any
+LLM_PROVIDER = "openai"
+LLM_MODEL = "gpt-4o"
+LLM_TEMPERATURE = 0.7
+# Resolve relative to project root: config.py → llm_agent → puli_mcp_server → src → project root
+_PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent.parent
+PROMPT_FILE_PATH = _PROJECT_ROOT / "prompts" / "analyze_code_prompt.yaml"
+@dataclass
+class LLMAgentConfig:
+    """Configuration for LLM Agent, loaded from environment variables."""
+    provider: str
+    model: str
+    temperature: float
+    system_prompt: str
+    @classmethod
+    def from_env(cls) -> "LLMAgentConfig":
+        """Load configuration from environment variables and local prompt file."""
+        provider = os.environ.get("LLM_PROVIDER", LLM_PROVIDER)
+        model = os.environ.get("LLM_MODEL", LLM_MODEL)
+        temperature = float(os.environ.get("LLM_TEMPERATURE", LLM_TEMPERATURE))
+        # Load prompt from absolute path
+        try:
+            with open(PROMPT_FILE_PATH, 'r', encoding='utf-8') as f:
+                prompt_text = f.read()
+            try:
+                data = yaml.safe_load(prompt_text)
+                system_prompt = data.get("prompt")
+            except yaml.YAMLError as e:
+                raise ValueError(f"Error parsing YAML prompt file: {e}")
+            if not system_prompt:
+                raise ValueError("System prompt 'prompt' key not found in YAML file")
+        except FileNotFoundError:
+            raise ValueError(f"Prompt file not found at {PROMPT_FILE_PATH}")
+        except Exception as e:
+            raise ValueError(f"Could not load prompt from {PROMPT_FILE_PATH}: {e}")
+        return cls(
+            provider=provider,
+            model=model,
+            temperature=temperature,
+            system_prompt=system_prompt,
+        )
+    @classmethod
+    def from_remote(cls, config: Dict[str, Any], prompts: Dict[str, Any]) -> "LLMAgentConfig":
+        """Load configuration from remote config dict (fetched from proxy).
+        Args:
+            config: Configuration dictionary from proxy /config/mcp endpoint
+            prompts: Prompts dictionary from proxy /config/mcp endpoint
+        Returns:
+            LLMAgentConfig instance with settings from remote config
+        """
+        provider = config.get("LLM_PROVIDER", LLM_PROVIDER)
+        model = config.get("LLM_MODEL", LLM_MODEL)
+        temperature = config.get("LLM_TEMPERATURE", LLM_TEMPERATURE)
+        system_prompt = prompts.get("analyze_prompt")
+        if not system_prompt:
+            print(f"analyze_prompt not found in prompts configuration: {prompts}")
+            raise ValueError("analyze_prompt not found in prompts configuration")
+        return cls(
+            provider=provider,
+            model=model,
+            temperature=temperature,
+            system_prompt=system_prompt,
+        )

puli_mcp_server/llm_agent/llm_agent.py ADDED Viewed

@@ -0,0 +1,46 @@
+from typing import Optional
+from pydantic_ai import Agent
+from .config import LLMAgentConfig
+from .models import LLMQueryRequest, RiskAssessment
+class LLMAgent:
+    """Agent for querying an LLM using pydantic-ai."""
+    def __init__(self, config: Optional[LLMAgentConfig] = None):
+        """
+        Initialize the LLM agent.
+        Args:
+            config: LLMAgentConfig instance. If None, loads from environment variables.
+        """
+        self.config = config or LLMAgentConfig.from_env()
+        # Build the model name string (e.g., "openai:gpt-4" or "anthropic:claude-3")
+        model_name = f"{self.config.provider}:{self.config.model}"
+        # Initialize the Agent with the model name string
+        # pydantic-ai will automatically read API keys from environment variables
+        self.agent = Agent(
+            model_name,
+            system_prompt=self.config.system_prompt,
+            output_type=RiskAssessment,
+        )
+    async def query(self, request: LLMQueryRequest) -> RiskAssessment:
+        """
+        Query the LLM with a ChangeSet and historical incidents.
+        Args:
+            request: LLMQueryRequest containing the data to analyze.
+        Returns:
+            The LLM response as a RiskAssessment object.
+        """
+        # Format the prompt with the request data
+        prompt = request.to_prompt_str()
+        result = await self.agent.run(prompt)
+        return result.output

puli_mcp_server/llm_agent/models.py ADDED Viewed

@@ -0,0 +1,284 @@
+from pydantic import BaseModel
+from typing import Optional, List
+from pydantic import BaseModel, Field, conint
+from enum import Enum
+import sys
+import os
+from puli_mcp_server.mcp_server.models import ChangeSet
+from puli_models import IncidentQueryResult, ChaosPatternQueryResult
+# ANSI color codes
+class ANSIColors:
+    """ANSI escape codes for terminal colors."""
+    RED = '\033[91m'
+    ORANGE = '\033[38;5;208m'  # Orange (256-color mode)
+    YELLOW = '\033[93m'
+    GREEN = '\033[92m'
+    MAGENTA = '\033[95m'
+    RESET = '\033[0m'
+def _should_use_colors() -> bool:
+    """
+    Detect if ANSI colors should be used based on environment.
+    Returns:
+        True if colors should be used, False otherwise.
+    """
+    # Check for FORCE_COLOR override (takes precedence)
+    if os.getenv("DISABLE_COLOR"):
+        return False
+    # Check if output is a TTY
+    if hasattr(sys.stdout, 'isatty') and sys.stdout.isatty():
+        return True
+    return False
+def risk_meter(rate: int, total: int = 100, bar_length: int = 20, color: str = "") -> str:
+    """
+    Generate a risk meter progress bar.
+    Args:
+        rate: Current value (0-100)
+        total: Maximum value (default: 100)
+        bar_length: Number of characters in the bar (default: 20)
+        color: ANSI color code to apply to the meter (optional)
+    Returns:
+        String with format: ▰▰▰▰▰▰▰▰▰▰▰▰▰▰▰▱▱▱▱▱
+    """
+    # Calculate how many filled blocks
+    filled = int((rate / total) * bar_length)
+    empty = bar_length - filled
+    # Create the bar
+    bar = '▰' * filled + '▱' * empty
+    # Apply color if provided
+    if color:
+        return f"{color}{bar}{ANSIColors.RESET}"
+    # Return formatted string
+    return bar
+class LLMQueryRequest(BaseModel):
+    """Request object for the LLM Agent query API."""
+    change_set: ChangeSet
+    historical_incidents: List[IncidentQueryResult]
+    relevant_chaos_patterns: List[ChaosPatternQueryResult]
+    def to_prompt_str(self) -> str:
+        prompt = "Analyze the following code changes:\n"
+        # Change set
+        prompt += self.change_set.to_embedding_string()
+        # add code diff
+        for change in self.change_set.changes:
+            prompt += f"\n{change.to_str()}"
+        # Add chaos patterns
+        chaos_patterns_str_list = [chaos_pattern.to_prompt_str() for chaos_pattern in self.relevant_chaos_patterns]
+        prompt += f"\nChaos patterns to consider:\n"
+        for ind, chaos_pattern_str in enumerate(chaos_patterns_str_list):
+            prompt += f"\n{ind}. {chaos_pattern_str}"
+        # Add incidents
+        incidents_str_list = [incident.to_prompt_str() for incident in self.historical_incidents]
+        prompt += f"\nRelated incidents:\n"
+        for ind, incident_str in enumerate(incidents_str_list):
+            prompt += f"\n{ind}. {incident_str}"
+        return prompt
+# Icon mapping for risk assessment levels
+_RISK_LEVEL_ICONS = {
+    "CRITICAL": "⛔",
+    "HIGH_RISK": "⚠️",
+    "MODERATE": "🔶",
+    "LOW_RISK": "✅",
+}
+class RiskAssessmentLevel(str, Enum):
+    CRITICAL = "CRITICAL"
+    HIGH_RISK = "HIGH_RISK"
+    MODERATE = "MODERATE"
+    LOW_RISK = "LOW_RISK"
+    def to_icon(self) -> str:
+        return _RISK_LEVEL_ICONS.get(self.value, "❓")
+class TechnicalFinding(BaseModel):
+    """
+    Precise, factual, code-level details of the finding.
+    """
+    file_path: str = Field(..., description="The relative path to the file.")
+    line_number: str = Field(..., description="The specific line number or range (e.g., '47' or '47-52').")
+    change_description: str = Field(..., description="Brief summary of what changed in the code.")
+    technical_reason: str = Field(
+        ...,
+        description="Why this breaks. Must be technical and specific (e.g., 'Missing index causes table scan'). Avoid vague phrases like 'might cause issues'."
+    )
+class BusinessContext(BaseModel):
+    """
+    Contextualizes the code within the broader business process.
+    Requirement: You must break the flow down into atomic steps.
+    Requirement: One step must clearly identify where the failure occurs (e.g., ["User Clicks", "API Request", "[DB DEADLOCK]", "Response Timeout"]).
+    """
+    process_description: str = Field(..., description="A description of the business process served by this code.")
+    flow_steps: List[str] = Field(
+        ...,
+        description="List of steps in the process. The point of failure should be one of the steps. Add a marker on the step that"
+    )
+class RealIncident(BaseModel):
+    """
+    Historical context if a similar pattern has caused a major outage before.
+    """
+    company: str = Field(..., description="The name of the company that suffered the incident.")
+    year: str = Field(..., description="The year the incident occurred.")
+    description: str = Field(..., description="Brief description of what happened in that specific incident.")
+class RiskAssessment(BaseModel):
+    """
+    The main structure for the code review output.
+    """
+    risk_assessment_level: RiskAssessmentLevel = Field(
+        ...,
+        description="The level of risk associated with the code change."
+    )
+    risk_score: conint(ge=0, le=100) = Field(
+        ...,
+        description="Risk score from 0 (Critical) to 100 (Safe)."
+    )
+    business_flow_name: str = Field(..., description="Top level name of the flow (e.g. 'Checkout Process').")
+    technical_finding: TechnicalFinding
+    business_context: BusinessContext
+    consequence: str = Field(
+        ...,
+        description="Description: What happens to the user or the business. Style Rule: Be strictly factual. No drama. No hyperbole. Example: \"User is double-charged. Support ticket generated.\" (NOT \"Catastrophic failure destroys trust\")."
+    )
+    chaos_scenario: Optional[str] = Field(
+        None,
+        description="Description of the chaos scenario that was run to test this risk assessment."
+    )
+    historical_incident: Optional[RealIncident] = Field(
+        None,
+        description="Only populate if a famous/known incident matches this exact failure pattern."
+    )
+    closing_line: str = Field(
+        ...,
+        description="A short line describing the risk assessment. No more then 15 words, typically 7 words."
+    )
+    def to_str(self, use_colors: Optional[bool] = None) -> str:
+            """
+            Converts the object into the specific text format required for the prompt.
+            Args:
+                use_colors: Whether to use ANSI colors. If None, auto-detect based on TTY.
+            """
+            # Auto-detect color support if not explicitly specified
+            if use_colors is None:
+                use_colors = _should_use_colors()
+            sep = "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+            # Conditionally apply colors
+            colored_sep = sep
+            risk_color = ""
+            if use_colors:
+                colored_sep = f"{ANSIColors.MAGENTA}{sep}{ANSIColors.RESET}"
+                # Map risk levels to colors
+                risk_colors = {
+                    "CRITICAL": ANSIColors.RED,
+                    "HIGH_RISK": ANSIColors.ORANGE,
+                    "MODERATE": ANSIColors.YELLOW,
+                    "LOW_RISK": ANSIColors.GREEN,
+                }
+                risk_color = risk_colors.get(self.risk_assessment_level.value, "")
+            # Build the sections list
+            sections = []
+            # 1. Header Section
+            sections.append(colored_sep)
+            icon = self.risk_assessment_level.to_icon()
+            meter = risk_meter(100 -self.risk_score, color=risk_color)
+            risk_assessment_level_str = f"{icon}"
+            if use_colors:
+                risk_assessment_level_str = f"{risk_color}{icon} {ANSIColors.RESET}"
+            sections.append(f"Puli Risk Assessment: {risk_assessment_level_str}")
+            sections.append(f"{meter}")
+            sections.append(colored_sep)
+            # 2. Technical Finding
+            sections.append("[TECHNICAL FINDING]")
+            tf = self.technical_finding
+            sections.append(f"File: {tf.file_path}, Line: {tf.line_number}")
+            sections.append(f"Change: {tf.change_description}")
+            sections.append(f"Why it breaks: {tf.technical_reason}\n")
+            # 3. Business Flow
+            sections.append("[BUSINESS FLOW]")
+            bc = self.business_context
+            sections.append(f"Process: {bc.process_description}")
+            # JOIN LOGIC: Join the list with arrows
+            formatted_flow = " → ".join(bc.flow_steps)
+            sections.append(f"Flow: {formatted_flow}\n")
+            # 4. Consequence
+            sections.append("[CONSEQUENCE]")
+            sections.append(f"{self.consequence}\n")
+            # 5. Real Incident (Only if present)
+            if self.historical_incident:
+                inc = self.historical_incident
+                sections.append("[REAL INCIDENT]")
+                sections.append(f"{inc.company} ({inc.year}): {inc.description}")
+                sections.append(colored_sep)
+            else:
+                sections.append(colored_sep)
+            # 6. Closing Line
+            sections.append(f"{self.closing_line}")
+            sections.append(colored_sep)
+            return "\n".join(sections)
+"""
+  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+  [SEVERITY ICON] [BUSINESS FLOW NAME]
+  Score: [X]/100
+  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+💀 SCENARIO 1: [Catchy 2-4 Word Name] ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+In [file] line [N], you're [specific description of what code does].
+WHAT IF: [Specific chaos injection — use actual values from their code]
+THEN: [Immediate technical consequence]
+BUSINESS IMPACT: [Real consequence — money, customers, reputation, legal]
+THIS IS REAL: [Company] [Year] — "[Brief quote or description]"
+  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+  [CLOSING LINE - based on score tier]
+  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+"""

puli_mcp_server/mcp_server/__init__.py ADDED Viewed

File without changes

puli_mcp_server/mcp_server/models.py ADDED Viewed

@@ -0,0 +1,63 @@
+from pydantic import BaseModel, Field
+from typing import List, Literal
+class FileChange(BaseModel):
+    """
+    Represents a change to a single file.
+    """
+    file_path: str = Field(
+        ...,
+        description="The full path of the file being modified (e.g., 'src/utils/parser.py')."
+    )
+    change_type: Literal["modify", "create", "delete"] = Field(
+        default="modify",
+        description="Whether this file is being edited, created new, or removed."
+    )
+    diff_content: str = Field(
+        ...,
+        description=(
+            "The standard Unified Diff of the change. "
+            "Must include '@@' headers and 3 lines of context around changes."
+        )
+    )
+    def to_str(self) -> str:
+        """Returns a string representation of the FileChange."""
+        return f"File: {self.file_path}\nType: {self.change_type}\nDiff:\n{self.diff_content}"
+class ChangeSet(BaseModel):
+    """
+    A logical grouping of changes across multiple files to achieve a single goal.
+    """
+    goal: str = Field(
+        ...,
+        description="High-level description of WHY this change is happening (e.g., 'Fix parsing bug in date conversion')."
+    )
+    # Renaming 'Code_diffs' to 'changes' for clarity
+    changes: List[FileChange]
+    related_infrastructure: str | None = Field(
+        None,
+        description="A list of specific system components, external services, or resources "
+            "that this code interacts with or affects. "
+            "Be specific (e.g., 'PostgreSQL: Users Table', 'Redis Cache', 'AWS S3', 'Stripe API', 'Kafka'). "
+            "Do not list generic terms like 'Backend' or 'Server'."
+    )
+    additional_context: str | None = Field(
+        None,
+        description="Any extra notes, ticket numbers, or constraints which are important to this area of the code and it's meanning."
+    )
+    def to_embedding_string(self) -> str:
+        """Returns a string representation of the ChangeSet for embedding, excluding changes."""
+        parts = [f"Goal: {self.goal}"]
+        if self.related_infrastructure:
+            parts.append(f"Infrastructure: {self.related_infrastructure}")
+        if self.additional_context:
+            parts.append(f"Context: {self.additional_context}")
+        return "\n".join(parts)