PyPI - massgen - Versions diffs - 0.1.0a2__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

massgen 0.1.0a2py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of massgen might be problematic. Click here for more details.

Files changed (111) hide show

massgen/formatter/_response_formatter.py CHANGED Viewed

@@ -241,6 +241,94 @@ class ResponseFormatter(FormatterBase):
         return converted_tools
+    def format_custom_tools(self, custom_tools: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """
+        Convert custom tools from RegisteredToolEntry format to Response API format.
+        Custom tools are provided as a dictionary where:
+        - Keys are tool names (str)
+        - Values are RegisteredToolEntry objects with:
+          - tool_name: str
+          - schema_def: dict with structure {"type": "function", "function": {...}}
+          - get_extended_schema: property that returns the schema with extensions
+        Response API expects: {"type": "function", "name": ..., "description": ..., "parameters": ...}
+        Args:
+            custom_tools: Dictionary of tool_name -> RegisteredToolEntry objects
+        Returns:
+            List of tools in Response API format
+        """
+        if not custom_tools:
+            return []
+        converted_tools = []
+        # Handle dictionary format: {tool_name: RegisteredToolEntry, ...}
+        if isinstance(custom_tools, dict):
+            for tool_name, tool_entry in custom_tools.items():
+                # Check if it's a RegisteredToolEntry object with schema_def
+                if hasattr(tool_entry, "schema_def"):
+                    tool_schema = tool_entry.schema_def
+                    # Extract function details from Chat Completions format
+                    if tool_schema.get("type") == "function" and "function" in tool_schema:
+                        func = tool_schema["function"]
+                        converted_tools.append(
+                            {
+                                "type": "function",
+                                "name": func.get("name", tool_entry.tool_name if hasattr(tool_entry, "tool_name") else tool_name),
+                                "description": func.get("description", ""),
+                                "parameters": func.get("parameters", {}),
+                            },
+                        )
+                # Check if it has get_extended_schema property
+                elif hasattr(tool_entry, "get_extended_schema"):
+                    tool_schema = tool_entry.get_extended_schema
+                    if tool_schema.get("type") == "function" and "function" in tool_schema:
+                        func = tool_schema["function"]
+                        converted_tools.append(
+                            {
+                                "type": "function",
+                                "name": func.get("name", tool_entry.tool_name if hasattr(tool_entry, "tool_name") else tool_name),
+                                "description": func.get("description", ""),
+                                "parameters": func.get("parameters", {}),
+                            },
+                        )
+        # Handle list format for backward compatibility
+        elif isinstance(custom_tools, list):
+            for tool in custom_tools:
+                if hasattr(tool, "schema_def"):
+                    tool_schema = tool.schema_def
+                    if tool_schema.get("type") == "function" and "function" in tool_schema:
+                        func = tool_schema["function"]
+                        converted_tools.append(
+                            {
+                                "type": "function",
+                                "name": func.get("name", tool.tool_name),
+                                "description": func.get("description", ""),
+                                "parameters": func.get("parameters", {}),
+                            },
+                        )
+                elif hasattr(tool, "get_extended_schema"):
+                    tool_schema = tool.get_extended_schema
+                    if tool_schema.get("type") == "function" and "function" in tool_schema:
+                        func = tool_schema["function"]
+                        converted_tools.append(
+                            {
+                                "type": "function",
+                                "name": func.get("name", tool.tool_name),
+                                "description": func.get("description", ""),
+                                "parameters": func.get("parameters", {}),
+                            },
+                        )
+        return converted_tools
     def format_mcp_tools(self, mcp_functions: Dict[str, Any]) -> List[Dict[str, Any]]:
         """Convert MCP tools to Response API format (OpenAI function declarations)."""
         if not mcp_functions:

massgen/frontend/coordination_ui.py CHANGED Viewed

@@ -315,7 +315,8 @@ class CoordinationUI:
             #     time.sleep(1.0)
             # Get final presentation from winning agent
-            if self.enable_final_presentation and selected_agent and vote_results.get("vote_counts"):
+            # Run final presentation if enabled and there's a selected agent (regardless of votes)
+            if self.enable_final_presentation and selected_agent:
                 # Don't print - let the display handle it
                 # print(f"\n🎤  Final Presentation from {selected_agent}:")
                 # print("=" * 60)
@@ -691,7 +692,8 @@ class CoordinationUI:
             #     time.sleep(1.0)
             # Get final presentation from winning agent
-            if self.enable_final_presentation and selected_agent and vote_results.get("vote_counts"):
+            # Run final presentation if enabled and there's a selected agent (regardless of votes)
+            if self.enable_final_presentation and selected_agent:
                 # Don't print - let the display handle it
                 # print(f"\n🎤 Final Presentation from {selected_agent}:")
                 # print("=" * 60)

massgen/logger_config.py CHANGED Viewed

@@ -16,13 +16,21 @@ Color Scheme for Debug Logging:
 """
 import inspect
+import subprocess
 import sys
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Optional
+import yaml
 from loguru import logger
+# Try to import massgen for version info (optional)
+try:
+    import massgen
+except ImportError:
+    massgen = None
 # Remove default logger to have full control
 logger.remove()
@@ -93,7 +101,12 @@ def get_log_session_dir(turn: Optional[int] = None) -> Path:
     return _LOG_SESSION_DIR
-def save_execution_metadata(query: str, config_path: Optional[str] = None, config_content: Optional[dict] = None):
+def save_execution_metadata(
+    query: str,
+    config_path: Optional[str] = None,
+    config_content: Optional[dict] = None,
+    cli_args: Optional[dict] = None,
+):
     """Save the query and config metadata to the log directory.
     This allows reconstructing what was executed in this session.
@@ -102,9 +115,8 @@ def save_execution_metadata(query: str, config_path: Optional[str] = None, confi
         query: The user's query/prompt
         config_path: Path to the config file that was used (optional)
         config_content: The actual config dictionary (optional)
+        cli_args: Command line arguments as dict (optional)
     """
-    import yaml
     log_dir = get_log_session_dir()
     # Create a single metadata file with all execution info
@@ -119,6 +131,26 @@ def save_execution_metadata(query: str, config_path: Optional[str] = None, confi
     if config_content:
         metadata["config"] = config_content
+    if cli_args:
+        metadata["cli_args"] = cli_args
+    # Try to get git information if in a git repository
+    try:
+        git_commit = subprocess.check_output(["git", "rev-parse", "HEAD"], stderr=subprocess.DEVNULL, text=True).strip()
+        git_branch = subprocess.check_output(["git", "rev-parse", "--abbrev-ref", "HEAD"], stderr=subprocess.DEVNULL, text=True).strip()
+        metadata["git"] = {"commit": git_commit, "branch": git_branch}
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        # Not in a git repo or git not available
+        pass
+    # Add Python version and package version
+    metadata["python_version"] = sys.version
+    if massgen is not None:
+        metadata["massgen_version"] = getattr(massgen, "__version__", "unknown")
+    # Add working directory
+    metadata["working_directory"] = str(Path.cwd())
     metadata_file = log_dir / "execution_metadata.yaml"
     try:
         with open(metadata_file, "w", encoding="utf-8") as f:

massgen/message_templates.py CHANGED Viewed

@@ -10,8 +10,22 @@ from typing import Any, Dict, List, Optional
 class MessageTemplates:
     """Message templates implementing the proven MassGen approach."""
-    def __init__(self, **template_overrides):
-        """Initialize with optional template overrides."""
+    def __init__(self, voting_sensitivity: str = "lenient", answer_novelty_requirement: str = "lenient", **template_overrides):
+        """Initialize with optional template overrides.
+        Args:
+            voting_sensitivity: Controls how critical agents are when voting.
+                - "lenient": Agents vote YES more easily, fewer new answers (default)
+                - "balanced": Agents apply detailed criteria (comprehensive, accurate, complete?)
+                - "strict": Agents apply high standards of excellence (all aspects, edge cases, reference-quality)
+            answer_novelty_requirement: Controls how different new answers must be.
+                - "lenient": No additional checks (default)
+                - "balanced": Require meaningful differences
+                - "strict": Require substantially different solutions
+            **template_overrides: Custom template strings to override defaults
+        """
+        self._voting_sensitivity = voting_sensitivity
+        self._answer_novelty_requirement = answer_novelty_requirement
         self._template_overrides = template_overrides
     # =============================================================================
@@ -57,14 +71,50 @@ class MessageTemplates:
         # Make sure you actually call `vote` or `new_answer` (in tool call format).
         #
         # *Note*: The CURRENT TIME is **{time.strftime("%Y-%m-%d %H:%M:%S")}**."""
+        # Determine evaluation criteria based on voting sensitivity
+        if self._voting_sensitivity == "strict":
+            evaluation_section = """Does the best CURRENT ANSWER address the ORIGINAL MESSAGE exceptionally well? Consider:
+- Is it comprehensive, addressing ALL aspects and edge cases?
+- Is it technically accurate and well-reasoned?
+- Does it provide clear explanations and proper justification?
+- Is it complete with no significant gaps or weaknesses?
+- Could it serve as a reference-quality solution?
+Only use the `vote` tool if the best answer meets high standards of excellence."""
+        elif self._voting_sensitivity == "balanced":
+            evaluation_section = """Does the best CURRENT ANSWER address the ORIGINAL MESSAGE well? Consider:
+- Is it comprehensive, accurate, and complete?
+- Could it be meaningfully improved, refined, or expanded?
+- Are there weaknesses, gaps, or better approaches?
+Only use the `vote` tool if the best answer is strong and complete."""
+        else:
+            # Default to lenient (including explicit "lenient" or any other value)
+            evaluation_section = """Does the best CURRENT ANSWER address the ORIGINAL MESSAGE well?
+If YES, use the `vote` tool to record your vote and skip the `new_answer` tool."""
+        # Add novelty requirement instructions if not lenient
+        novelty_section = ""
+        if self._answer_novelty_requirement == "balanced":
+            novelty_section = """
+IMPORTANT: If you provide a new answer, it must be meaningfully different from existing answers.
+- Don't just rephrase or reword existing solutions
+- Introduce new insights, approaches, or tools
+- Make substantive improvements, not cosmetic changes"""
+        elif self._answer_novelty_requirement == "strict":
+            novelty_section = """
+CRITICAL: New answers must be SUBSTANTIALLY different from existing answers.
+- Use a fundamentally different approach or methodology
+- Employ different tools or techniques
+- Provide significantly more depth or novel perspectives
+- If you cannot provide a truly novel solution, vote instead"""
         return f"""You are evaluating answers from multiple agents for final response to a message.
 Different agents may have different builtin tools and capabilities.
-Does the best CURRENT ANSWER address the ORIGINAL MESSAGE well?
-If YES, use the `vote` tool to record your vote and skip the `new_answer` tool.
+{evaluation_section}
 Otherwise, digest existing answers, combine their strengths, and do additional work to address their weaknesses,
-then use the `new_answer` tool to record a better answer to the ORIGINAL MESSAGE.
+then use the `new_answer` tool to record a better answer to the ORIGINAL MESSAGE.{novelty_section}
 Make sure you actually call `vote` or `new_answer` (in tool call format).
 *Note*: The CURRENT TIME is **{time.strftime("%Y-%m-%d %H:%M:%S")}**."""

massgen/orchestrator.py CHANGED Viewed

@@ -44,6 +44,7 @@ from .logger_config import (
 )
 from .message_templates import MessageTemplates
 from .stream_chunk import ChunkType
+from .tool import get_workflow_tools
 from .utils import ActionType, AgentStatus, CoordinationStage
@@ -137,9 +138,16 @@ class Orchestrator(ChatAgent):
         self.config = config or AgentConfig.create_openai_config()
         # Get message templates from config
-        self.message_templates = self.config.message_templates or MessageTemplates()
-        # Create workflow tools for agents (vote and new_answer)
-        self.workflow_tools = self.message_templates.get_standard_tools(list(agents.keys()))
+        self.message_templates = self.config.message_templates or MessageTemplates(
+            voting_sensitivity=self.config.voting_sensitivity,
+            answer_novelty_requirement=self.config.answer_novelty_requirement,
+        )
+        # Create workflow tools for agents (vote and new_answer) using new toolkit system
+        self.workflow_tools = get_workflow_tools(
+            valid_agent_ids=list(agents.keys()),
+            template_overrides=getattr(self.message_templates, "_template_overrides", {}),
+            api_format="chat_completions",  # Default format, will be overridden per backend
+        )
         # MassGen-specific state
         self.current_task: Optional[str] = None
@@ -841,8 +849,8 @@ class Orchestrator(ChatAgent):
         # Generate single timestamp for answer/vote and workspace
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
-        # Save answer if provided
-        if answer_content:
+        # Save answer if provided (or create final directory structure even if empty)
+        if answer_content is not None or is_final:
             try:
                 log_session_dir = get_log_session_dir()
                 if log_session_dir:
@@ -855,8 +863,9 @@ class Orchestrator(ChatAgent):
                     timestamped_dir.mkdir(parents=True, exist_ok=True)
                     answer_file = timestamped_dir / "answer.txt"
-                    # Write the answer content
-                    answer_file.write_text(answer_content)
+                    # Write the answer content (even if empty for final snapshots)
+                    content_to_write = answer_content if answer_content is not None else ""
+                    answer_file.write_text(content_to_write)
                     logger.info(f"[Orchestrator._save_agent_snapshot] Saved answer to {answer_file}")
             except Exception as e:
@@ -935,7 +944,7 @@ class Orchestrator(ChatAgent):
             logger.info(f"[Orchestrator._save_agent_snapshot] Agent {agent_id} does not have filesystem_manager")
         # Save context if provided (unified context saving)
-        if context_data and (answer_content or vote_data):
+        if context_data:
             try:
                 log_session_dir = get_log_session_dir()
                 if log_session_dir:
@@ -944,6 +953,8 @@ class Orchestrator(ChatAgent):
                     else:
                         timestamped_dir = log_session_dir / agent_id / timestamp
+                    # Ensure directory exists (may not have been created if no answer/vote)
+                    timestamped_dir.mkdir(parents=True, exist_ok=True)
                     context_file = timestamped_dir / "context.txt"
                     # Handle different types of context data
@@ -1122,6 +1133,91 @@ class Orchestrator(ChatAgent):
     #     # Implementation will check against PermissionManager
     #     pass
+    def _calculate_jaccard_similarity(self, text1: str, text2: str) -> float:
+        """Calculate Jaccard similarity between two texts based on word tokens.
+        Args:
+            text1: First text to compare
+            text2: Second text to compare
+        Returns:
+            Similarity score between 0.0 and 1.0
+        """
+        # Tokenize and normalize - simple word-based approach
+        words1 = set(text1.lower().split())
+        words2 = set(text2.lower().split())
+        if not words1 and not words2:
+            return 1.0  # Both empty, consider identical
+        if not words1 or not words2:
+            return 0.0  # One empty, one not
+        intersection = len(words1 & words2)
+        union = len(words1 | words2)
+        return intersection / union if union > 0 else 0.0
+    def _check_answer_novelty(self, new_answer: str, existing_answers: Dict[str, str]) -> tuple[bool, Optional[str]]:
+        """Check if a new answer is sufficiently different from existing answers.
+        Args:
+            new_answer: The proposed new answer
+            existing_answers: Dictionary of existing answers {agent_id: answer_content}
+        Returns:
+            Tuple of (is_novel, error_message). is_novel=True if answer passes novelty check.
+        """
+        # Lenient mode: no checks (current behavior)
+        if self.config.answer_novelty_requirement == "lenient":
+            return (True, None)
+        # Determine threshold based on setting
+        if self.config.answer_novelty_requirement == "strict":
+            threshold = 0.50  # Reject if >50% overlap (strict)
+            error_msg = (
+                "Your answer is too similar to existing answers (>50% overlap). Please use a fundamentally different approach, employ different tools/techniques, or vote for an existing answer."
+            )
+        else:  # balanced
+            threshold = 0.70  # Reject if >70% overlap (balanced)
+            error_msg = (
+                "Your answer is too similar to existing answers (>70% overlap). "
+                "Please provide a meaningfully different solution with new insights, "
+                "approaches, or tools, or vote for an existing answer."
+            )
+        # Check similarity against all existing answers
+        for agent_id, existing_answer in existing_answers.items():
+            similarity = self._calculate_jaccard_similarity(new_answer, existing_answer)
+            if similarity > threshold:
+                logger.info(f"[Orchestrator] Answer rejected: {similarity:.2%} similar to {agent_id}'s answer (threshold: {threshold:.0%})")
+                return (False, error_msg)
+        # Answer is sufficiently novel
+        return (True, None)
+    def _check_answer_count_limit(self, agent_id: str) -> tuple[bool, Optional[str]]:
+        """Check if agent has reached their answer count limit.
+        Args:
+            agent_id: The agent attempting to provide a new answer
+        Returns:
+            Tuple of (can_answer, error_message). can_answer=True if agent can provide another answer.
+        """
+        # No limit set
+        if self.config.max_new_answers_per_agent is None:
+            return (True, None)
+        # Count how many answers this agent has provided
+        answer_count = len(self.coordination_tracker.answers_by_agent.get(agent_id, []))
+        if answer_count >= self.config.max_new_answers_per_agent:
+            error_msg = f"You've reached the maximum of {self.config.max_new_answers_per_agent} new answer(s). Please vote for the best existing answer using the `vote` tool."
+            logger.info(f"[Orchestrator] Answer rejected: {agent_id} has reached limit ({answer_count}/{self.config.max_new_answers_per_agent})")
+            return (False, error_msg)
+        return (True, None)
     def _create_tool_error_messages(
         self,
         agent: "ChatAgent",
@@ -1443,6 +1539,10 @@ class Orchestrator(ChatAgent):
                         # Forward MCP status messages with proper formatting
                         mcp_content = f"🔧 MCP: {chunk.content}"
                         yield ("content", mcp_content)
+                    elif chunk_type == "custom_tool_status":
+                        # Forward custom tool status messages with proper formatting
+                        custom_tool_content = f"🔧 Custom Tool: {chunk.content}"
+                        yield ("content", custom_tool_content)
                     elif chunk_type == "debug":
                         # Forward debug chunks
                         yield ("debug", chunk.content)
@@ -1660,6 +1760,54 @@ class Orchestrator(ChatAgent):
                             # Agent provided new answer
                             content = tool_args.get("content", response_text.strip())
+                            # Check answer count limit
+                            can_answer, count_error = self._check_answer_count_limit(agent_id)
+                            if not can_answer:
+                                if attempt < max_attempts - 1:
+                                    if self._check_restart_pending(agent_id):
+                                        await self._save_partial_work_on_restart(agent_id)
+                                        yield (
+                                            "content",
+                                            f"🔁 [{agent_id}] gracefully restarting due to new answer detected\n",
+                                        )
+                                        yield ("done", None)
+                                        return
+                                    yield ("content", f"❌ {count_error}")
+                                    # Create proper tool error message for retry
+                                    enforcement_msg = self._create_tool_error_messages(agent, [tool_call], count_error)
+                                    continue
+                                else:
+                                    yield (
+                                        "error",
+                                        f"Answer count limit reached after {max_attempts} attempts",
+                                    )
+                                    yield ("done", None)
+                                    return
+                            # Check answer novelty (similarity to existing answers)
+                            is_novel, novelty_error = self._check_answer_novelty(content, answers)
+                            if not is_novel:
+                                if attempt < max_attempts - 1:
+                                    if self._check_restart_pending(agent_id):
+                                        await self._save_partial_work_on_restart(agent_id)
+                                        yield (
+                                            "content",
+                                            f"🔁 [{agent_id}] gracefully restarting due to new answer detected\n",
+                                        )
+                                        yield ("done", None)
+                                        return
+                                    yield ("content", f"❌ {novelty_error}")
+                                    # Create proper tool error message for retry
+                                    enforcement_msg = self._create_tool_error_messages(agent, [tool_call], novelty_error)
+                                    continue
+                                else:
+                                    yield (
+                                        "error",
+                                        f"Answer novelty requirement not met after {max_attempts} attempts",
+                                    )
+                                    yield ("done", None)
+                                    return
                             # Check for duplicate answer
                             # Normalize both new content and existing content to neutral paths for comparison
                             normalized_new_content = self._normalize_workspace_paths_for_comparison(content)
@@ -1695,6 +1843,9 @@ class Orchestrator(ChatAgent):
                             return
                         elif tool_name.startswith("mcp"):
                             pass
+                        elif tool_name.startswith("custom_tool"):
+                            # Custom tools are handled by the backend and their results are streamed separately
+                            pass
                         else:
                             # Non-workflow tools not yet implemented
                             yield (
@@ -2056,6 +2207,7 @@ class Orchestrator(ChatAgent):
         # Use agent's chat method with proper system message (reset chat for clean presentation)
         presentation_content = ""
+        final_snapshot_saved = False  # Track whether snapshot was saved during stream
         try:
             # Track final round iterations (each chunk is like an iteration)
@@ -2121,6 +2273,9 @@ class Orchestrator(ChatAgent):
                     # Track the final answer in coordination tracker
                     self.coordination_tracker.set_final_answer(selected_agent_id, final_answer, snapshot_timestamp="final")
+                    # Mark snapshot as saved
+                    final_snapshot_saved = True
                     log_stream_chunk("orchestrator", "done", None, selected_agent_id)
                     yield StreamChunk(type="done", source=selected_agent_id)
                 elif chunk_type == "error":
@@ -2139,7 +2294,7 @@ class Orchestrator(ChatAgent):
                             type=chunk_type,
                             content=getattr(chunk, "content", ""),
                             source=selected_agent_id,
-                            **{k: v for k, v in chunk.__dict__.items() if k not in ["type", "content", "source"]},
+                            **{k: v for k, v in chunk.__dict__.items() if k not in ["type", "content", "source", "timestamp", "sequence_number"]},
                         )
                     else:
                         log_stream_chunk(
@@ -2152,10 +2307,24 @@ class Orchestrator(ChatAgent):
                             type=chunk_type,
                             content=getattr(chunk, "content", ""),
                             source=selected_agent_id,
-                            **{k: v for k, v in chunk.__dict__.items() if k not in ["type", "content", "source"]},
+                            **{k: v for k, v in chunk.__dict__.items() if k not in ["type", "content", "source", "timestamp", "sequence_number"]},
                         )
         finally:
+            # Ensure final snapshot is always saved (even if "done" chunk wasn't yielded)
+            if not final_snapshot_saved:
+                final_answer = presentation_content.strip() if presentation_content.strip() else self.agent_states[selected_agent_id].answer
+                final_context = self.get_last_context(selected_agent_id)
+                await self._save_agent_snapshot(
+                    self._selected_agent,
+                    answer_content=final_answer,
+                    is_final=True,
+                    context_data=final_context,
+                )
+                # Track the final answer in coordination tracker
+                self.coordination_tracker.set_final_answer(selected_agent_id, final_answer, snapshot_timestamp="final")
             # Store the final presentation content for logging
             if presentation_content.strip():
                 # Store the synthesized final answer

massgen/stream_chunk/base.py CHANGED Viewed

@@ -33,6 +33,9 @@ class ChunkType(Enum):
     # MCP-related chunks
     MCP_STATUS = "mcp_status"
+    # Custom tool chunks
+    CUSTOM_TOOL_STATUS = "custom_tool_status"
     # Multimodal chunks
     MEDIA = "media"
     MEDIA_PROGRESS = "media_progress"

massgen 0.1.0a2__py3-none-any.whl → 0.1.1__py3-none-any.whl

Potentially problematic release.

massgen 0.1.0a2py3-none-any.whl → 0.1.1py3-none-any.whl