PyPI - praisonaiagents - Versions diffs - 0.0.12__tar.gz → 0.0.13__tar.gz - Mend

praisonaiagents 0.0.12tar.gz → 0.0.13tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{praisonaiagents-0.0.12 → praisonaiagents-0.0.13}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: praisonaiagents
-Version: 0.0.12
+Version: 0.0.13
 Summary: Praison AI agents for completing complex tasks with Self Reflection Agents
 Author: Mervin Praison
 Requires-Dist: pydantic

{praisonaiagents-0.0.12 → praisonaiagents-0.0.13}/praisonaiagents/agent/agent.py RENAMED Viewed

@@ -140,7 +140,7 @@ class Agent:
         max_rpm: Optional[int] = None,
         max_execution_time: Optional[int] = None,
         memory: bool = True,
-        verbose: bool = False,
+        verbose: bool = True,
         allow_delegation: bool = False,
         step_callback: Optional[Any] = None,
         cache: bool = True,
@@ -191,6 +191,8 @@ class Agent:
         self.max_reflect = max_reflect
         self.min_reflect = min_reflect
         self.reflect_llm = reflect_llm
+        self.console = Console()  # Create a single console instance for the agent
     def execute_tool(self, function_name, arguments):
         """
         Execute a tool dynamically based on the function name and arguments.
@@ -235,7 +237,6 @@ class Agent:
         return f"Agent(name='{self.name}', role='{self.role}', goal='{self.goal}')"
     def _chat_completion(self, messages, temperature=0.2, tools=None, stream=True):
-        console = Console()
         start_time = time.time()
         logging.debug(f"{self.name} sending messages to LLM: {messages}")
@@ -305,12 +306,24 @@ class Agent:
                     stream=True
                 )
                 full_response_text = ""
-                with Live(display_generating("", start_time), refresh_per_second=4) as live:
+                # Create Live display with proper configuration
+                with Live(
+                    display_generating("", start_time),
+                    console=self.console,
+                    refresh_per_second=4,
+                    transient=False,  # Changed to False to preserve output
+                    vertical_overflow="ellipsis",
+                    auto_refresh=True
+                ) as live:
                     for chunk in response_stream:
                         if chunk.choices[0].delta.content:
                             full_response_text += chunk.choices[0].delta.content
                             live.update(display_generating(full_response_text, start_time))
+                # Clear the last generating display with a blank line
+                self.console.print()
                 final_response = client.chat.completions.create(
                     model=self.llm,
                     messages=messages,
@@ -347,7 +360,11 @@ Your Goal: {self.goal}
         if system_prompt:
             messages.append({"role": "system", "content": system_prompt})
         messages.extend(self.chat_history)
-        messages.append({"role": "user", "content": prompt})
+        if isinstance(prompt, list):
+            # If we receive a multimodal prompt list, place it directly in the user message
+            messages.append({"role": "user", "content": prompt})
+        else:
+            messages.append({"role": "user", "content": prompt})
         final_response_text = None
         reflection_count = 0
@@ -356,7 +373,14 @@ Your Goal: {self.goal}
         while True:
             try:
                 if self.verbose:
-                    display_instruction(f"Agent {self.name} is processing prompt: {prompt}")
+                    # Handle both string and list prompts for instruction display
+                    display_text = prompt
+                    if isinstance(prompt, list):
+                        # Extract text content from multimodal prompt
+                        display_text = next((item["text"] for item in prompt if item["type"] == "text"), "")
+                    if display_text and str(display_text).strip():
+                        display_instruction(f"Agent {self.name} is processing prompt: {display_text}", console=self.console)
                 response = self._chat_completion(messages, temperature=temperature, tools=tools if tools else None)
                 if not response:
@@ -376,13 +400,13 @@ Your Goal: {self.goal}
                         arguments = json.loads(tool_call.function.arguments)
                         if self.verbose:
-                            display_tool_call(f"Agent {self.name} is calling function '{function_name}' with arguments: {arguments}")
+                            display_tool_call(f"Agent {self.name} is calling function '{function_name}' with arguments: {arguments}", console=self.console)
                         tool_result = self.execute_tool(function_name, arguments)
                         if tool_result:
                             if self.verbose:
-                                display_tool_call(f"Function '{function_name}' returned: {tool_result}")
+                                display_tool_call(f"Function '{function_name}' returned: {tool_result}", console=self.console)
                             messages.append({
                                 "role": "tool",
                                 "tool_call_id": tool_call.id,
@@ -407,7 +431,7 @@ Your Goal: {self.goal}
                     self.chat_history.append({"role": "assistant", "content": response_text})
                     if self.verbose:
                         logging.info(f"Agent {self.name} final response: {response_text}")
-                    display_interaction(prompt, response_text, markdown=self.markdown, generation_time=time.time() - start_time)
+                    display_interaction(prompt, response_text, markdown=self.markdown, generation_time=time.time() - start_time, console=self.console)
                     return response_text
                 reflection_prompt = f"""
@@ -430,26 +454,26 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                     reflection_output = reflection_response.choices[0].message.parsed
                     if self.verbose:
-                        display_self_reflection(f"Agent {self.name} self reflection (using {self.reflect_llm if self.reflect_llm else self.llm}): reflection='{reflection_output.reflection}' satisfactory='{reflection_output.satisfactory}'")
+                        display_self_reflection(f"Agent {self.name} self reflection (using {self.reflect_llm if self.reflect_llm else self.llm}): reflection='{reflection_output.reflection}' satisfactory='{reflection_output.satisfactory}'", console=self.console)
                     messages.append({"role": "assistant", "content": f"Self Reflection: {reflection_output.reflection} Satisfactory?: {reflection_output.satisfactory}"})
                     # Only consider satisfactory after minimum reflections
                     if reflection_output.satisfactory == "yes" and reflection_count >= self.min_reflect - 1:
                         if self.verbose:
-                            display_self_reflection("Agent marked the response as satisfactory after meeting minimum reflections")
+                            display_self_reflection("Agent marked the response as satisfactory after meeting minimum reflections", console=self.console)
                         self.chat_history.append({"role": "user", "content": prompt})
                         self.chat_history.append({"role": "assistant", "content": response_text})
-                        display_interaction(prompt, response_text, markdown=self.markdown, generation_time=time.time() - start_time)
+                        display_interaction(prompt, response_text, markdown=self.markdown, generation_time=time.time() - start_time, console=self.console)
                         return response_text
                     # Check if we've hit max reflections
                     if reflection_count >= self.max_reflect - 1:
                         if self.verbose:
-                            display_self_reflection("Maximum reflection count reached, returning current response")
+                            display_self_reflection("Maximum reflection count reached, returning current response", console=self.console)
                         self.chat_history.append({"role": "user", "content": prompt})
                         self.chat_history.append({"role": "assistant", "content": response_text})
-                        display_interaction(prompt, response_text, markdown=self.markdown, generation_time=time.time() - start_time)
+                        display_interaction(prompt, response_text, markdown=self.markdown, generation_time=time.time() - start_time, console=self.console)
                         return response_text
                     logging.debug(f"{self.name} reflection count {reflection_count + 1}, continuing reflection process")
@@ -460,12 +484,12 @@ Output MUST be JSON with 'reflection' and 'satisfactory'.
                     continue  # Continue the loop for more reflections
                 except Exception as e:
-                    display_error(f"Error in parsing self-reflection json {e}. Retrying")
+                    display_error(f"Error in parsing self-reflection json {e}. Retrying", console=self.console)
                     logging.error("Reflection parsing failed.", exc_info=True)
                     messages.append({"role": "assistant", "content": f"Self Reflection failed."})
                     reflection_count += 1
                     continue  # Continue even after error to try again
             except Exception as e:
-                display_error(f"Error in chat: {e}")
+                display_error(f"Error in chat: {e}", console=self.console)
                 return None

{praisonaiagents-0.0.12 → praisonaiagents-0.0.13}/praisonaiagents/agents/agents.py RENAMED Viewed

@@ -11,6 +11,33 @@ from ..main import display_error, TaskOutput, error_logs, client
 from ..agent.agent import Agent
 from ..task.task import Task
+def encode_file_to_base64(file_path: str) -> str:
+    """Base64-encode a file."""
+    import base64
+    with open(file_path, "rb") as f:
+        return base64.b64encode(f.read()).decode("utf-8")
+def process_video(video_path: str, seconds_per_frame=2):
+    """Split video into frames (base64-encoded)."""
+    import cv2
+    import base64
+    base64_frames = []
+    video = cv2.VideoCapture(video_path)
+    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+    fps = video.get(cv2.CAP_PROP_FPS)
+    frames_to_skip = int(fps * seconds_per_frame)
+    curr_frame = 0
+    while curr_frame < total_frames:
+        video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
+        success, frame = video.read()
+        if not success:
+            break
+        _, buffer = cv2.imencode(".jpg", frame)
+        base64_frames.append(base64.b64encode(buffer).decode("utf-8"))
+        curr_frame += frames_to_skip
+    video.release()
+    return base64_frames
 class PraisonAIAgents:
     def __init__(self, agents, tasks, verbose=0, completion_checker=None, max_retries=5, process="sequential", manager_llm=None):
         self.agents = agents
@@ -58,6 +85,19 @@ class PraisonAIAgents:
             display_error(f"Error: Task with ID {task_id} does not exist")
             return
         task = self.tasks[task_id]
+        # Only import multimodal dependencies if task has images
+        if task.images and task.status == "not started":
+            try:
+                import cv2
+                import base64
+                from moviepy import VideoFileClip
+            except ImportError as e:
+                display_error(f"Error: Missing required dependencies for image/video processing: {e}")
+                display_error("Please install with: pip install opencv-python moviepy")
+                task.status = "failed"
+                return None
         if task.status == "not started":
             task.status = "in progress"
@@ -83,7 +123,47 @@ Expected Output: {task.expected_output}.
         if self.verbose >= 2:
             logging.info(f"Executing task {task_id}: {task.description} using {executor_agent.name}")
         logging.debug(f"Starting execution of task {task_id} with prompt:\n{task_prompt}")
-        agent_output = executor_agent.chat(task_prompt, tools=task.tools)
+        if task.images:
+            def _get_multimodal_message(text_prompt, images):
+                content = [{"type": "text", "text": text_prompt}]
+                for img in images:
+                    # If local file path for a valid image
+                    if os.path.exists(img):
+                        ext = os.path.splitext(img)[1].lower()
+                        # If it's a .mp4, convert to frames
+                        if ext == ".mp4":
+                            frames = process_video(img, seconds_per_frame=1)
+                            content.append({"type": "text", "text": "These are frames from the video."})
+                            for f in frames:
+                                content.append({
+                                    "type": "image_url",
+                                    "image_url": {"url": f"data:image/jpg;base64,{f}"}
+                                })
+                        else:
+                            encoded = encode_file_to_base64(img)
+                            content.append({
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": f"data:image/{ext.lstrip('.')};base64,{encoded}"
+                                }
+                            })
+                    else:
+                        # Treat as a remote URL
+                        content.append({
+                            "type": "image_url",
+                            "image_url": {"url": img}
+                        })
+                return content
+            agent_output = executor_agent.chat(
+                _get_multimodal_message(task_prompt, task.images),
+                tools=task.tools
+            )
+        else:
+            agent_output = executor_agent.chat(task_prompt, tools=task.tools)
         if agent_output:
             task_output = TaskOutput(
                 description=task.description,

{praisonaiagents-0.0.12 → praisonaiagents-0.0.13}/praisonaiagents/main.py RENAMED Viewed

@@ -25,43 +25,92 @@ logging.basicConfig(
 # Global list to store error logs
 error_logs = []
-def display_interaction(message: str, response: str, markdown: bool = True, generation_time: Optional[float] = None):
-    console = Console()
-    if generation_time is not None:
+def _clean_display_content(content: str, max_length: int = 20000) -> str:
+    """Helper function to clean and truncate content for display."""
+    if not content or not str(content).strip():
+        return ""
+    content = str(content)
+    # Handle base64 content
+    if "base64" in content:
+        content_parts = []
+        for line in content.split('\n'):
+            if "base64" not in line:
+                content_parts.append(line)
+        content = '\n'.join(content_parts)
+    # Truncate if too long
+    if len(content) > max_length:
+        content = content[:max_length] + "..."
+    return content.strip()
+def display_interaction(message, response, markdown=True, generation_time=None, console=None):
+    """Display the interaction between user and assistant."""
+    if console is None:
+        console = Console()
+    if generation_time:
         console.print(Text(f"Response generated in {generation_time:.1f}s", style="dim"))
-    else:
-        console.print(Text("Response Generation Complete", style="dim"))
+    # Handle multimodal content (list)
+    if isinstance(message, list):
+        # Extract just the text content from the multimodal message
+        text_content = next((item["text"] for item in message if item["type"] == "text"), "")
+        message = text_content
+    message = _clean_display_content(str(message))
+    response = _clean_display_content(str(response))
     if markdown:
         console.print(Panel.fit(Markdown(message), title="Message", border_style="cyan"))
         console.print(Panel.fit(Markdown(response), title="Response", border_style="cyan"))
     else:
         console.print(Panel.fit(Text(message, style="bold green"), title="Message", border_style="cyan"))
-        console.print(Panel.fit(Text(response, style="bold white"), title="Response", border_style="cyan"))
-def display_self_reflection(message: str):
-    console = Console()
+        console.print(Panel.fit(Text(response, style="bold blue"), title="Response", border_style="cyan"))
+def display_self_reflection(message: str, console=None):
+    if not message or not message.strip():
+        return
+    if console is None:
+        console = Console()
+    message = _clean_display_content(str(message))
     console.print(Panel.fit(Text(message, style="bold yellow"), title="Self Reflection", border_style="magenta"))
-def display_instruction(message: str):
-    console = Console()
+def display_instruction(message: str, console=None):
+    if not message or not message.strip():
+        return
+    if console is None:
+        console = Console()
+    message = _clean_display_content(str(message))
     console.print(Panel.fit(Text(message, style="bold blue"), title="Instruction", border_style="cyan"))
-def display_tool_call(message: str):
-    console = Console()
+def display_tool_call(message: str, console=None):
+    if not message or not message.strip():
+        return
+    if console is None:
+        console = Console()
+    message = _clean_display_content(str(message))
     console.print(Panel.fit(Text(message, style="bold cyan"), title="Tool Call", border_style="green"))
-def display_error(message: str):
-    console = Console()
+def display_error(message: str, console=None):
+    if not message or not message.strip():
+        return
+    if console is None:
+        console = Console()
+    message = _clean_display_content(str(message))
     console.print(Panel.fit(Text(message, style="bold red"), title="Error", border_style="red"))
     # Store errors
     error_logs.append(message)
 def display_generating(content: str = "", start_time: Optional[float] = None):
+    if not content or not str(content).strip():
+        return Panel("", title="", border_style="green")  # Return empty panel when no content
     elapsed_str = ""
     if start_time is not None:
         elapsed = time.time() - start_time
         elapsed_str = f" {elapsed:.1f}s"
+    content = _clean_display_content(str(content))
     return Panel(Markdown(content), title=f"Generating...{elapsed_str}", border_style="green")
 def clean_triple_backticks(text: str) -> str:

{praisonaiagents-0.0.12 → praisonaiagents-0.0.13}/praisonaiagents/task/task.py RENAMED Viewed

@@ -22,7 +22,8 @@ class Task:
         status: str = "not started",
         result: Optional[TaskOutput] = None,
         create_directory: Optional[bool] = False,
-        id: Optional[int] = None
+        id: Optional[int] = None,
+        images: Optional[List[str]] = None
     ):
         self.description = description
         self.expected_output = expected_output
@@ -40,6 +41,7 @@ class Task:
         self.result = result
         self.create_directory = create_directory
         self.id = id
+        self.images = images if images else []
         if self.output_json and self.output_pydantic:
             raise ValueError("Only one output type can be defined")

{praisonaiagents-0.0.12 → praisonaiagents-0.0.13}/praisonaiagents.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: praisonaiagents
-Version: 0.0.12
+Version: 0.0.13
 Summary: Praison AI agents for completing complex tasks with Self Reflection Agents
 Author: Mervin Praison
 Requires-Dist: pydantic

{praisonaiagents-0.0.12 → praisonaiagents-0.0.13}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "praisonaiagents"
-version = "0.0.12"
+version = "0.0.13"
 description = "Praison AI agents for completing complex tasks with Self Reflection Agents"
 authors = [
     { name="Mervin Praison" }