PyPI - quantalogic - Versions diffs - 0.2.16__py3-none-any.whl → 0.2.18__py3-none-any.whl - Mend

quantalogic 0.2.16py3-none-any.whl → 0.2.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

quantalogic/__init__.py +3 -2
quantalogic/agent.py +94 -38
quantalogic/agent_config.py +62 -14
quantalogic/coding_agent.py +17 -2
quantalogic/{print_event.py → console_print_events.py} +1 -3
quantalogic/console_print_token.py +16 -0
quantalogic/docs_cli.py +50 -0
quantalogic/generative_model.py +86 -77
quantalogic/main.py +128 -18
quantalogic/prompts.py +2 -2
quantalogic/search_agent.py +12 -1
quantalogic/server/agent_server.py +2 -2
quantalogic/tools/llm_tool.py +52 -11
quantalogic/tools/llm_vision_tool.py +23 -7
quantalogic/xml_parser.py +109 -49
{quantalogic-0.2.16.dist-info → quantalogic-0.2.18.dist-info}/METADATA +62 -153
{quantalogic-0.2.16.dist-info → quantalogic-0.2.18.dist-info}/RECORD +20 -18
quantalogic-0.2.18.dist-info/entry_points.txt +6 -0
quantalogic-0.2.16.dist-info/entry_points.txt +0 -3
{quantalogic-0.2.16.dist-info → quantalogic-0.2.18.dist-info}/LICENSE +0 -0
{quantalogic-0.2.16.dist-info → quantalogic-0.2.18.dist-info}/WHEEL +0 -0

quantalogic/__init__.py CHANGED Viewed

@@ -11,10 +11,11 @@ warnings.filterwarnings(
 from .agent import Agent  # noqa: E402
+from .console_print_events import console_print_events  # noqa: E402
+from .console_print_token import console_print_token  # noqa: E402
 from .event_emitter import EventEmitter  # noqa: E402
 from .memory import AgentMemory, VariableMemory  # noqa: E402
-from .print_event import console_print_events  # noqa: E402
 """QuantaLogic package for AI-powered generative models."""
-__all__ = ["Agent", "EventEmitter", "AgentMemory", "VariableMemory", "console_print_events"]
+__all__ = ["Agent", "EventEmitter", "AgentMemory", "VariableMemory", "console_print_events","console_print_token"]

quantalogic/agent.py CHANGED Viewed

@@ -8,7 +8,7 @@ from loguru import logger
 from pydantic import BaseModel, ConfigDict
 from quantalogic.event_emitter import EventEmitter
-from quantalogic.generative_model import GenerativeModel
+from quantalogic.generative_model import GenerativeModel, ResponseStats, TokenUsage
 from quantalogic.memory import AgentMemory, Message, VariableMemory
 from quantalogic.prompts import system_prompt
 from quantalogic.tool_manager import ToolManager
@@ -71,6 +71,8 @@ class Agent(BaseModel):
     max_output_tokens: int = DEFAULT_MAX_OUTPUT_TOKENS
     max_iterations: int = 30
     system_prompt: str = ""
+    compact_every_n_iterations: int | None = None  # Add this to the class attributes
+    max_tokens_working_memory: int | None = None  # Add max_tokens_working_memory attribute
     def __init__(
         self,
@@ -81,10 +83,15 @@ class Agent(BaseModel):
         task_to_solve: str = "",
         specific_expertise: str = "General AI assistant with coding and problem-solving capabilities",
         get_environment: Callable[[], str] = get_environment,
+        compact_every_n_iterations: int | None = None,  # New parameter
+        max_tokens_working_memory: int | None = None,  # New parameter to set max working memory tokens
     ):
         """Initialize the agent with model, memory, tools, and configurations."""
         try:
             logger.debug("Initializing agent...")
+            # Create event emitter first
+            event_emitter = EventEmitter()
             # Add TaskCompleteTool to the tools list if not already present
             if TaskCompleteTool() not in tools:
                 tools.append(TaskCompleteTool())
@@ -108,7 +115,7 @@ class Agent(BaseModel):
             logger.debug("Base class init started ...")
             super().__init__(
-                model=GenerativeModel(model=model_name),
+                model=GenerativeModel(model=model_name, event_emitter=event_emitter),
                 memory=memory,
                 variable_store=VariableMemory(),
                 tools=tool_manager,
@@ -116,19 +123,30 @@ class Agent(BaseModel):
                 ask_for_user_validation=ask_for_user_validation,
                 task_to_solve=task_to_solve,
                 specific_expertise=specific_expertise,
+                event_emitter=event_emitter,
             )
+            # Set the new compact_every_n_iterations parameter
+            self.compact_every_n_iterations = compact_every_n_iterations or self.max_iterations
+            logger.debug(f"Memory will be compacted every {self.compact_every_n_iterations} iterations")
+            # Set the max_tokens_working_memory parameter
+            self.max_tokens_working_memory = max_tokens_working_memory
+            logger.debug(f"Max tokens for working memory set to: {self.max_tokens_working_memory}")
             logger.debug("Agent initialized successfully.")
         except Exception as e:
             logger.error(f"Failed to initialize agent: {str(e)}")
             raise
-    def solve_task(self, task: str, max_iterations: int = 30) -> str:
+    def solve_task(self, task: str, max_iterations: int = 30, streaming: bool = False) -> str:
         """Solve the given task using the ReAct framework.
         Args:
             task (str): The task description.
             max_iterations (int, optional): Maximum number of iterations to attempt solving the task.
                 Defaults to 30 to prevent infinite loops and ensure timely task completion.
+            streaming (bool, optional): Whether to use streaming mode for generating responses.
         Returns:
             str: The final response after task completion.
@@ -172,11 +190,34 @@ class Agent(BaseModel):
                 self._compact_memory_if_needed(current_prompt)
-                result = self.model.generate_with_history(messages_history=self.memory.memory, prompt=current_prompt)
+                if streaming:
+                    # For streaming, collect the response chunks
+                    content = ""
+                    for chunk in self.model.generate_with_history(
+                        messages_history=self.memory.memory, prompt=current_prompt, streaming=True
+                    ):
+                        content += chunk
+                    # Create a response object similar to non-streaming mode
+                    result = ResponseStats(
+                        response=content,
+                        usage=TokenUsage(
+                            prompt_tokens=0,  # We don't have token counts in streaming mode
+                            completion_tokens=0,
+                            total_tokens=0,
+                        ),
+                        model=self.model.model,
+                        finish_reason="stop",
+                    )
+                else:
+                    result = self.model.generate_with_history(
+                        messages_history=self.memory.memory, prompt=current_prompt, streaming=False
+                    )
                 content = result.response
-                token_usage = result.usage
-                self.total_tokens = token_usage.total_tokens
+                if not streaming:  # Only update tokens for non-streaming mode
+                    token_usage = result.usage
+                    self.total_tokens = token_usage.total_tokens
                 # Emit event: Task Think End
                 self._emit_event(
@@ -187,7 +228,7 @@ class Agent(BaseModel):
                 )
                 # Process the assistant's response
-                result = self._observe_response(result.response, iteration=self.current_iteration)
+                result = self._observe_response(content, iteration=self.current_iteration)
                 current_prompt = result.next_prompt
@@ -237,9 +278,31 @@ class Agent(BaseModel):
         self.total_tokens = self.model.token_counter_with_history(message_history, prompt)
     def _compact_memory_if_needed(self, current_prompt: str = ""):
-        """Compacts the memory if it exceeds the maximum occupancy."""
+        """Compacts the memory if it exceeds the maximum occupancy or token limit."""
         ratio_occupied = self._calculate_context_occupancy()
-        if ratio_occupied >= MAX_OCCUPANCY:
+        # Compact memory if any of these conditions are met:
+        # 1. Memory occupancy exceeds MAX_OCCUPANCY, or
+        # 2. Current iteration is a multiple of compact_every_n_iterations, or
+        # 3. Working memory exceeds max_tokens_working_memory (if set)
+        should_compact_by_occupancy = ratio_occupied >= MAX_OCCUPANCY
+        should_compact_by_iteration = (
+            self.compact_every_n_iterations is not None and
+            self.current_iteration > 0 and
+            self.current_iteration % self.compact_every_n_iterations == 0
+        )
+        should_compact_by_token_limit = (
+            self.max_tokens_working_memory is not None and
+            self.total_tokens > self.max_tokens_working_memory
+        )
+        if should_compact_by_occupancy or should_compact_by_iteration or should_compact_by_token_limit:
+            if should_compact_by_occupancy:
+                logger.debug(f"Memory compaction triggered: Occupancy {ratio_occupied}% exceeds {MAX_OCCUPANCY}%")
+            if should_compact_by_iteration:
+                logger.debug(f"Memory compaction triggered: Iteration {self.current_iteration} is a multiple of {self.compact_every_n_iterations}")
             self._emit_event("memory_full")
             self.memory.compact()
             self.total_tokens = self.model.token_counter_with_history(self.memory.memory, current_prompt)
@@ -292,9 +355,10 @@ class Agent(BaseModel):
                 is_repeated_call = self._is_repeated_tool_call(tool_name, arguments_with_values)
                 if is_repeated_call:
-                    return self._handle_repeated_tool_call(tool_name, arguments_with_values)
+                    executed_tool, response = self._handle_repeated_tool_call(tool_name, arguments_with_values)
+                else:
+                    executed_tool, response = self._execute_tool(tool_name, tool, arguments_with_values)
-                executed_tool, response = self._execute_tool(tool_name, tool, arguments_with_values)
                 if not executed_tool:
                     return self._handle_tool_execution_failure(response)
@@ -427,34 +491,26 @@ class Agent(BaseModel):
         # Format the response message
         formatted_response = (
-            "\n"
-            f"--- Observations for iteration {iteration} / max {self.max_iterations} ---\n"
-            "\n"
-            f"\n --- Tool execution result stored in variable ${variable_name}$ --- \n"
-            "\n"
-            f"<{variable_name}>\n{response_display}\n</{variable_name}>\n" + "\n"
-            "\n"
-            f"--- Tools --- \n"
-            "\n"
-            f"{self._get_tools_names_prompt()}"
-            "\n"
-            f"--- Variables --- \n"
-            "\n"
-            f"{self._get_variable_prompt()}\n"
-            "\n"
-            "You must analyze this answer and evaluate what to do next to solve the task.\n"
-            "If the step failed, take a step back and rethink your approach.\n"
-            "\n"
-            "--- Task to solve summary ---\n"
-            "\n"
-            f"{self.task_to_solve_summary}"
-            "\n"
+            f"\n--- Observations for iteration {iteration} / max {self.max_iterations} ---\n"
+            f"\n--- Tool execution result in ${variable_name}$ ---\n"
+            f"<{variable_name}>\n{response_display}\n</{variable_name}>\n\n"
+            f"--- Tools ---\n{self._get_tools_names_prompt()}\n"
+            f"--- Variables ---\n{self._get_variable_prompt()}\n"
+            "Analyze this response to determine the next steps. If the step failed, reconsider your approach.\n"
+            f"--- Task to solve summary ---\n{self.task_to_solve_summary}\n"
             "--- Format ---\n"
-            "\n"
-            "You MUST respond with exactly two XML blocks formatted in markdown:\n"
-            "\n"
-            " - One <thinking> block detailing your analysis,\n"
-            " - One <tool_name> block specifying the chosen tool and its arguments, as outlined in the system prompt.\n"
+            "Respond only with two XML blocks in markdown as specified in system prompt.\n"
+            "No extra comments must be added.\n"
+            "```xml\n"
+            "<thinking>\n"
+            "...\n"
+            "</thinking>\n"
+            "```\n"
+            "```xml\n"
+            "< ...tool_name... >\n"
+            "...\n"
+            "</ ...tool_name... >\n"
+            "```"
         )
         return formatted_response

quantalogic/agent_config.py CHANGED Viewed

@@ -5,9 +5,11 @@
 # Local application imports
 from quantalogic.agent import Agent
 from quantalogic.coding_agent import create_coding_agent
+from quantalogic.console_print_token import console_print_token
 from quantalogic.tools import (
     AgentTool,
     DownloadHttpFileTool,
+    DuckDuckGoSearchTool,
     EditWholeContentTool,
     ExecuteBashCommandTool,
     InputQuestionTool,
@@ -23,20 +25,28 @@ from quantalogic.tools import (
     RipgrepTool,
     SearchDefinitionNames,
     TaskCompleteTool,
-    WriteFileTool,
-    DuckDuckGoSearchTool,
     WikipediaSearchTool,
+    WriteFileTool,
 )
 MODEL_NAME = "deepseek/deepseek-chat"
-def create_agent(model_name: str, vision_model_name: str | None) -> Agent:
+def create_agent(
+    model_name: str,
+    vision_model_name: str | None,
+    no_stream: bool = False,
+    compact_every_n_iteration: int | None = None,
+    max_tokens_working_memory: int | None = None
+) -> Agent:
     """Create an agent with the specified model and tools.
     Args:
         model_name (str): Name of the model to use
         vision_model_name (str | None): Name of the vision model to use
+        no_stream (bool, optional): If True, the agent will not stream results.
+        compact_every_n_iteration (int | None, optional): Frequency of memory compaction.
+        max_tokens_working_memory (int | None, optional): Maximum tokens for working memory.
     Returns:
         Agent: An agent with the specified model and tools
@@ -54,25 +64,36 @@ def create_agent(model_name: str, vision_model_name: str | None) -> Agent:
         RipgrepTool(),
         SearchDefinitionNames(),
         MarkitdownTool(),
-        LLMTool(model_name=model_name),
+        LLMTool(model_name=model_name, on_token=console_print_token if not no_stream else None),
         DownloadHttpFileTool(),
     ]
     if vision_model_name:
-        tools.append(LLMVisionTool(model_name=vision_model_name))
+        tools.append(LLMVisionTool(model_name=vision_model_name, on_token=console_print_token if not no_stream else None))
     return Agent(
         model_name=model_name,
         tools=tools,
+        compact_every_n_iterations=compact_every_n_iteration,
+        max_tokens_working_memory=max_tokens_working_memory,
     )
-def create_interpreter_agent(model_name: str, vision_model_name: str | None) -> Agent:
+def create_interpreter_agent(
+    model_name: str,
+    vision_model_name: str | None,
+    no_stream: bool = False,
+    compact_every_n_iteration: int | None = None,
+    max_tokens_working_memory: int | None = None
+) -> Agent:
     """Create an interpreter agent with the specified model and tools.
     Args:
         model_name (str): Name of the model to use
         vision_model_name (str | None): Name of the vision model to use
+        no_stream (bool, optional): If True, the agent will not stream results.
+        compact_every_n_iteration (int | None, optional): Frequency of memory compaction.
+        max_tokens_working_memory (int | None, optional): Maximum tokens for working memory.
     Returns:
         Agent: An interpreter agent with the specified model and tools
@@ -92,18 +113,32 @@ def create_interpreter_agent(model_name: str, vision_model_name: str | None) ->
         NodeJsTool(),
         SearchDefinitionNames(),
         MarkitdownTool(),
-        LLMTool(model_name=model_name),
+        LLMTool(model_name=model_name, on_token=console_print_token if not no_stream else None),
         DownloadHttpFileTool(),
     ]
-    return Agent(model_name=model_name, tools=tools)
+    return Agent(
+        model_name=model_name,
+        tools=tools,
+        compact_every_n_iterations=compact_every_n_iteration,
+        max_tokens_working_memory=max_tokens_working_memory,
+    )
-def create_full_agent(model_name: str, vision_model_name: str | None) -> Agent:
+def create_full_agent(
+    model_name: str,
+    vision_model_name: str | None,
+    no_stream: bool = False,
+    compact_every_n_iteration: int | None = None,
+    max_tokens_working_memory: int | None = None
+) -> Agent:
     """Create an agent with the specified model and many tools.
     Args:
         model_name (str): Name of the model to use
         vision_model_name (str | None): Name of the vision model to use
+        no_stream (bool, optional): If True, the agent will not stream results.
+        compact_every_n_iteration (int | None, optional): Frequency of memory compaction.
+        max_tokens_working_memory (int | None, optional): Maximum tokens for working memory.
     Returns:
         Agent: An agent with the specified model and tools
@@ -124,27 +159,38 @@ def create_full_agent(model_name: str, vision_model_name: str | None) -> Agent:
         NodeJsTool(),
         SearchDefinitionNames(),
         MarkitdownTool(),
-        LLMTool(model_name=model_name),
+        LLMTool(model_name=model_name, on_token=console_print_token if not no_stream else None),
         DownloadHttpFileTool(),
         WikipediaSearchTool(),
         DuckDuckGoSearchTool(),
     ]
     if vision_model_name:
-        tools.append(LLMVisionTool(model_name=vision_model_name))
+        tools.append(LLMVisionTool(model_name=vision_model_name,on_token=console_print_token if not no_stream else None))
     return Agent(
         model_name=model_name,
         tools=tools,
+        compact_every_n_iterations=compact_every_n_iteration,
+        max_tokens_working_memory=max_tokens_working_memory,
     )
-def create_orchestrator_agent(model_name: str, vision_model_name: str | None = None) -> Agent:
+def create_orchestrator_agent(
+    model_name: str,
+    vision_model_name: str | None = None,
+    no_stream: bool = False,
+    compact_every_n_iteration: int | None = None,
+    max_tokens_working_memory: int | None = None
+) -> Agent:
     """Create an agent with the specified model and tools.
     Args:
         model_name (str): Name of the model to use
         vision_model_name (str | None): Name of the vision model to use
+        no_stream (bool, optional): If True, the agent will not stream results.
+        compact_every_n_iteration (int | None, optional): Frequency of memory compaction.
+        max_tokens_working_memory (int | None, optional): Maximum tokens for working memory.
     Returns:
         Agent: An agent with the specified model and tools
@@ -160,14 +206,16 @@ def create_orchestrator_agent(model_name: str, vision_model_name: str | None = N
         ReadFileBlockTool(),
         RipgrepTool(),
         SearchDefinitionNames(),
-        LLMTool(model_name=MODEL_NAME),
+        LLMTool(model_name=model_name, on_token=console_print_token if not no_stream else None),
         AgentTool(agent=coding_agent_instance, agent_role="software expert", name="coder_agent_tool"),
     ]
     if vision_model_name:
-        tools.append(LLMVisionTool(model_name=vision_model_name))
+        tools.append(LLMVisionTool(model_name=vision_model_name, on_token=console_print_token if not no_stream else None))
     return Agent(
         model_name=model_name,
         tools=tools,
+        compact_every_n_iterations=compact_every_n_iteration,
+        max_tokens_working_memory=max_tokens_working_memory,
     )

quantalogic/coding_agent.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from quantalogic.agent import Agent
+from quantalogic.console_print_token import console_print_token
 from quantalogic.tools import (
     DuckDuckGoSearchTool,
     EditWholeContentTool,
@@ -19,13 +20,23 @@ from quantalogic.utils import get_coding_environment
 from quantalogic.utils.get_quantalogic_rules_content import get_quantalogic_rules_file_content
-def create_coding_agent(model_name: str, vision_model_name: str | None = None, basic: bool = False) -> Agent:
+def create_coding_agent(
+    model_name: str,
+    vision_model_name: str | None = None,
+    basic: bool = False,
+    no_stream: bool = False,
+    compact_every_n_iteration: int | None = None,
+    max_tokens_working_memory: int | None = None
+) -> Agent:
     """Creates and configures a coding agent with a comprehensive set of tools.
     Args:
         model_name (str): Name of the language model to use for the agent's core capabilities
         vision_model_name (str | None): Name of the vision model to use for the agent's core capabilities
         basic (bool, optional): If True, the agent will be configured with a basic set of tools.
+        no_stream (bool, optional): If True, the agent will not stream results.
+        compact_every_n_iteration (int | None, optional): Frequency of memory compaction.
+        max_tokens_working_memory (int | None, optional): Maximum tokens for working memory.
     Returns:
         Agent: A fully configured coding agent instance with:
@@ -64,7 +75,7 @@ def create_coding_agent(model_name: str, vision_model_name: str | None = None, b
     ]
     if vision_model_name:
-        tools.append(LLMVisionTool(model_name=vision_model_name))
+        tools.append(LLMVisionTool(model_name=vision_model_name, on_token=console_print_token if not no_stream else None))
     if not basic:
         tools.append(
@@ -72,6 +83,7 @@ def create_coding_agent(model_name: str, vision_model_name: str | None = None, b
                 model_name=model_name,
                 system_prompt="You are a software expert, your role is to answer coding questions.",
                 name="coding_consultant",  # Handles implementation-level coding questions
+                on_token=console_print_token if not no_stream else None,
             )
         )
         tools.append(
@@ -79,6 +91,7 @@ def create_coding_agent(model_name: str, vision_model_name: str | None = None, b
                 model_name=model_name,
                 system_prompt="You are a software architect, your role is to answer software architecture questions.",
                 name="software_architect",  # Handles system design and architecture questions
+                on_token=console_print_token if not no_stream else None,
             )
         )
@@ -87,4 +100,6 @@ def create_coding_agent(model_name: str, vision_model_name: str | None = None, b
         tools=tools,
         specific_expertise=specific_expertise,
         get_environment=get_coding_environment,
+        compact_every_n_iterations=compact_every_n_iteration,
+        max_tokens_working_memory=max_tokens_working_memory,
     )

quantalogic/{print_event.py → console_print_events.py} RENAMED Viewed

@@ -1,5 +1,3 @@
-"""Print events with rich formatting."""
 from typing import Any
 from rich import box
@@ -65,4 +63,4 @@ def console_print_events(event: str, data: dict[str, Any] | None = None):
         expand=True,
     )
-    console.print(panel)
+    console.print(panel)

quantalogic/console_print_token.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""Print events with rich formatting."""
+from typing import Any
+from rich.console import Console
+def console_print_token(event: str, data: Any | None = None):
+    """Print a token with rich formatting.
+    Args:
+        event (str): The event name (e.g., 'stream_chunk')
+        data (Any | None): The token data to print
+    """
+    console = Console()
+    console.print(data, end="")

quantalogic/docs_cli.py ADDED Viewed

@@ -0,0 +1,50 @@
+import subprocess
+import os
+import sys
+def get_config_path():
+    """Get the absolute path to the mkdocs configuration file."""
+    return os.path.join(os.path.dirname(os.path.dirname(__file__)), 'mkdocs', 'mkdocs.yml')
+def serve_docs():
+    """Serve MkDocs documentation locally."""
+    config_path = get_config_path()
+    try:
+        subprocess.run(['mkdocs', 'serve', '--config-file', config_path], check=True)
+    except subprocess.CalledProcessError as e:
+        print(f"Error serving documentation: {e}")
+        sys.exit(1)
+def build_docs():
+    """Build MkDocs documentation."""
+    config_path = get_config_path()
+    try:
+        subprocess.run(['mkdocs', 'build', '--config-file', config_path], check=True)
+        print("Documentation built successfully.")
+    except subprocess.CalledProcessError as e:
+        print(f"Error building documentation: {e}")
+        sys.exit(1)
+def deploy_docs():
+    """Deploy MkDocs documentation to GitHub Pages."""
+    config_path = get_config_path()
+    try:
+        subprocess.run(['mkdocs', 'gh-deploy', '--config-file', config_path], check=True)
+        print("Documentation deployed successfully.")
+    except subprocess.CalledProcessError as e:
+        print(f"Error deploying documentation: {e}")
+        sys.exit(1)
+# Ensure the script can be run directly for testing
+if __name__ == '__main__':
+    command = sys.argv[1] if len(sys.argv) > 1 else None
+    if command == 'serve':
+        serve_docs()
+    elif command == 'build':
+        build_docs()
+    elif command == 'deploy':
+        deploy_docs()
+    else:
+        print("Usage: python docs_cli.py [serve|build|deploy]")
+        sys.exit(1)

quantalogic 0.2.16__py3-none-any.whl → 0.2.18__py3-none-any.whl

quantalogic 0.2.16py3-none-any.whl → 0.2.18py3-none-any.whl