PyPI - droidrun - Versions diffs - 0.3.9__py3-none-any.whl → 0.3.10.dev3__py3-none-any.whl - Mend

droidrun 0.3.9py3-none-any.whl → 0.3.10.dev3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

droidrun/__init__.py +2 -3
droidrun/__main__.py +1 -1
droidrun/agent/__init__.py +1 -1
droidrun/agent/codeact/__init__.py +1 -4
droidrun/agent/codeact/codeact_agent.py +66 -40
droidrun/agent/codeact/events.py +6 -3
droidrun/agent/codeact/prompts.py +2 -2
droidrun/agent/common/events.py +4 -2
droidrun/agent/context/__init__.py +1 -3
droidrun/agent/context/agent_persona.py +2 -1
droidrun/agent/context/context_injection_manager.py +6 -6
droidrun/agent/context/episodic_memory.py +5 -3
droidrun/agent/context/personas/__init__.py +3 -3
droidrun/agent/context/personas/app_starter.py +3 -3
droidrun/agent/context/personas/big_agent.py +3 -3
droidrun/agent/context/personas/default.py +3 -3
droidrun/agent/context/personas/ui_expert.py +5 -5
droidrun/agent/context/task_manager.py +15 -17
droidrun/agent/droid/__init__.py +1 -1
droidrun/agent/droid/droid_agent.py +327 -180
droidrun/agent/droid/events.py +91 -9
droidrun/agent/executor/__init__.py +13 -0
droidrun/agent/executor/events.py +24 -0
droidrun/agent/executor/executor_agent.py +327 -0
droidrun/agent/executor/prompts.py +136 -0
droidrun/agent/manager/__init__.py +18 -0
droidrun/agent/manager/events.py +20 -0
droidrun/agent/manager/manager_agent.py +459 -0
droidrun/agent/manager/prompts.py +223 -0
droidrun/agent/oneflows/app_starter_workflow.py +118 -0
droidrun/agent/oneflows/text_manipulator.py +204 -0
droidrun/agent/planner/__init__.py +3 -3
droidrun/agent/planner/events.py +6 -3
droidrun/agent/planner/planner_agent.py +27 -42
droidrun/agent/planner/prompts.py +2 -2
droidrun/agent/usage.py +11 -11
droidrun/agent/utils/__init__.py +11 -1
droidrun/agent/utils/async_utils.py +2 -1
droidrun/agent/utils/chat_utils.py +48 -60
droidrun/agent/utils/device_state_formatter.py +177 -0
droidrun/agent/utils/executer.py +12 -11
droidrun/agent/utils/inference.py +114 -0
droidrun/agent/utils/llm_picker.py +2 -0
droidrun/agent/utils/message_utils.py +85 -0
droidrun/agent/utils/tools.py +220 -0
droidrun/agent/utils/trajectory.py +8 -7
droidrun/cli/__init__.py +1 -1
droidrun/cli/logs.py +29 -28
droidrun/cli/main.py +279 -143
droidrun/config_manager/__init__.py +25 -0
droidrun/config_manager/config_manager.py +583 -0
droidrun/macro/__init__.py +2 -2
droidrun/macro/__main__.py +1 -1
droidrun/macro/cli.py +36 -34
droidrun/macro/replay.py +7 -9
droidrun/portal.py +1 -1
droidrun/telemetry/__init__.py +2 -2
droidrun/telemetry/events.py +3 -4
droidrun/telemetry/phoenix.py +173 -0
droidrun/telemetry/tracker.py +7 -5
droidrun/tools/__init__.py +1 -1
droidrun/tools/adb.py +210 -82
droidrun/tools/ios.py +7 -5
droidrun/tools/tools.py +25 -8
{droidrun-0.3.9.dist-info → droidrun-0.3.10.dev3.dist-info}/METADATA +5 -3
droidrun-0.3.10.dev3.dist-info/RECORD +70 -0
droidrun/agent/common/default.py +0 -5
droidrun/agent/context/reflection.py +0 -20
droidrun/agent/oneflows/reflector.py +0 -265
droidrun-0.3.9.dist-info/RECORD +0 -56
{droidrun-0.3.9.dist-info → droidrun-0.3.10.dev3.dist-info}/WHEEL +0 -0
{droidrun-0.3.9.dist-info → droidrun-0.3.10.dev3.dist-info}/entry_points.txt +0 -0
{droidrun-0.3.9.dist-info → droidrun-0.3.10.dev3.dist-info}/licenses/LICENSE +0 -0

droidrun/agent/utils/chat_utils.py CHANGED Viewed

@@ -1,16 +1,12 @@
-import base64
-import re
 import inspect
 import json
 import logging
-from typing import List, TYPE_CHECKING, Optional, Tuple
-from droidrun.agent.context import Reflection
+import re
+from typing import List, Optional, Tuple
 from llama_index.core.base.llms.types import ChatMessage, ImageBlock, TextBlock
-if TYPE_CHECKING:
-    from droidrun.tools import Tools
+from droidrun.telemetry.phoenix import clean_span
 logger = logging.getLogger("droidrun")
@@ -27,41 +23,23 @@ def message_copy(message: ChatMessage, deep = True) -> ChatMessage:
     return copied_message
-async def add_reflection_summary(reflection: Reflection, chat_history: List[ChatMessage]) -> List[ChatMessage]:
-    """Add reflection summary and advice to help the planner understand what went wrong and what to do differently."""
-    reflection_text = "\n### The last task failed. You have additional information about what happenend. \nThe Reflection from Previous Attempt:\n"
-    if reflection.summary:
-        reflection_text += f"**What happened:** {reflection.summary}\n\n"
-    if reflection.advice:
-        reflection_text += f"**Recommended approach for this retry:** {reflection.advice}\n"
-    reflection_block = TextBlock(text=reflection_text)
-    # Copy chat_history and append reflection block to the last message
-    chat_history = chat_history.copy()
-    chat_history[-1] = message_copy(chat_history[-1])
-    chat_history[-1].blocks.append(reflection_block)
-    return chat_history
 def _format_ui_elements(ui_data, level=0) -> str:
     """Format UI elements in natural language: index. className: resourceId, text - bounds"""
     if not ui_data:
         return ""
     formatted_lines = []
     indent = "  " * level  # Indentation for nested elements
     # Handle both list and single element
     elements = ui_data if isinstance(ui_data, list) else [ui_data]
     for element in elements:
         if not isinstance(element, dict):
             continue
         # Extract element properties
         index = element.get('index', '')
         class_name = element.get('className', '')
@@ -69,15 +47,15 @@ def _format_ui_elements(ui_data, level=0) -> str:
         text = element.get('text', '')
         bounds = element.get('bounds', '')
         children = element.get('children', [])
         # Format the line: index. className: resourceId, text - bounds
         line_parts = []
         if index != '':
             line_parts.append(f"{index}.")
         if class_name:
             line_parts.append(class_name + ":")
         details = []
         if resource_id:
             details.append(f'"{resource_id}"')
@@ -85,19 +63,19 @@ def _format_ui_elements(ui_data, level=0) -> str:
             details.append(f'"{text}"')
         if details:
             line_parts.append(", ".join(details))
         if bounds:
             line_parts.append(f"- ({bounds})")
         formatted_line = f"{indent}{' '.join(line_parts)}"
         formatted_lines.append(formatted_line)
         # Recursively format children with increased indentation
         if children:
             child_formatted = _format_ui_elements(children, level + 1)
             if child_formatted:
                 formatted_lines.append(child_formatted)
     return "\n".join(formatted_lines)
 async def add_ui_text_block(ui_state: str, chat_history: List[ChatMessage], copy = True) -> List[ChatMessage]:
@@ -111,7 +89,7 @@ async def add_ui_text_block(ui_state: str, chat_history: List[ChatMessage], copy
         except (json.JSONDecodeError, TypeError):
             # Fallback to original format if parsing fails
             ui_block = TextBlock(text="\nCurrent Clickable UI elements from the device using the custom TopViewService:\n```json\n" + json.dumps(ui_state) + "\n```\n")
         if copy:
             chat_history = chat_history.copy()
             chat_history[-1] = message_copy(chat_history[-1])
@@ -129,27 +107,27 @@ async def add_screenshot_image_block(screenshot, chat_history: List[ChatMessage]
 async def add_phone_state_block(phone_state, chat_history: List[ChatMessage]) -> List[ChatMessage]:
     # Format the phone state data nicely
     if isinstance(phone_state, dict) and 'error' not in phone_state:
         current_app = phone_state.get('currentApp', '')
         package_name = phone_state.get('packageName', 'Unknown')
         keyboard_visible = phone_state.get('keyboardVisible', False)
         focused_element = phone_state.get('focusedElement')
         # Format the focused element
         if focused_element:
             element_text = focused_element.get('text', '')
             element_class = focused_element.get('className', '')
             element_resource_id = focused_element.get('resourceId', '')
             # Build focused element description
             focused_desc = f"'{element_text}' {element_class}"
             if element_resource_id:
                 focused_desc += f" | ID: {element_resource_id}"
         else:
             focused_desc = "None"
         phone_state_text = f"""
 **Current Phone State:**
 • **App:** {current_app} ({package_name})
@@ -162,7 +140,7 @@ async def add_phone_state_block(phone_state, chat_history: List[ChatMessage]) ->
             phone_state_text = f"\n📱 **Phone State Error:** {phone_state.get('message', 'Unknown error')}\n"
         else:
             phone_state_text = f"\n📱 **Phone State:** {phone_state}\n"
     ui_block = TextBlock(text=phone_state_text)
     chat_history = chat_history.copy()
     chat_history[-1] = message_copy(chat_history[-1])
@@ -170,7 +148,7 @@ async def add_phone_state_block(phone_state, chat_history: List[ChatMessage]) ->
     return chat_history
 async def add_packages_block(packages, chat_history: List[ChatMessage]) -> List[ChatMessage]:
     ui_block = TextBlock(text=f"\nInstalled packages: {packages}\n```\n")
     chat_history = chat_history.copy()
     chat_history[-1] = message_copy(chat_history[-1])
@@ -181,7 +159,7 @@ async def add_memory_block(memory: List[str], chat_history: List[ChatMessage]) -
     memory_block = "\n### Remembered Information:\n"
     for idx, item in enumerate(memory, 1):
         memory_block += f"{idx}. {item}\n"
     for i, msg in enumerate(chat_history):
         if msg.role == "user":
             if isinstance(msg.content, str):
@@ -194,13 +172,6 @@ async def add_memory_block(memory: List[str], chat_history: List[ChatMessage]) -
             break
     return chat_history
-async def get_reflection_block(reflections: List[Reflection]) -> ChatMessage:
-    reflection_block = "\n### You also have additional Knowledge to help you guide your current task from previous expierences:\n"
-    for reflection in reflections:
-        reflection_block += f"**{reflection.advice}\n"
-    return ChatMessage(role="user", content=reflection_block)
 async def add_task_history_block(all_tasks: list[dict], chat_history: List[ChatMessage]) -> List[ChatMessage]:
     """Experimental task history with all previous tasks."""
     if not all_tasks:
@@ -212,8 +183,8 @@ async def add_task_history_block(all_tasks: list[dict], chat_history: List[ChatM
         status_value: str
         if hasattr(task, "description") and hasattr(task, "status"):
-            description = getattr(task, "description")
-            status_value = getattr(task, "status") or "unknown"
+            description = task.description
+            status_value = task.status or "unknown"
         elif isinstance(task, dict):
             description = str(task.get("description", task))
             status_value = str(task.get("status", "unknown"))
@@ -236,7 +207,7 @@ def parse_tool_descriptions(tool_list) -> str:
     """Parses the available tools and their descriptions for the system prompt."""
     logger.info("🛠️  Parsing tool descriptions...")
     tool_descriptions = []
     for tool in tool_list.values():
         assert callable(tool), f"Tool {tool} is not callable."
         tool_name = tool.__name__
@@ -253,11 +224,11 @@ def parse_tool_descriptions(tool_list) -> str:
 def parse_persona_description(personas) -> str:
     """Parses the available agent personas and their descriptions for the system prompt."""
     logger.debug("👥 Parsing agent persona descriptions for Planner Agent...")
     if not personas:
         logger.warning("No agent personas provided to Planner Agent")
         return "No specialized agents available."
     persona_descriptions = []
     for persona in personas:
         # Format each persona with name, description, and expertise areas
@@ -265,7 +236,7 @@ def parse_persona_description(personas) -> str:
         formatted_persona = f"- **{persona.name}**: {persona.description}\n  Expertise: {expertise_list}"
         persona_descriptions.append(formatted_persona)
         logger.debug(f"  - Parsed persona: {persona.name}")
     # Join all persona descriptions into a single string
     descriptions = "\n".join(persona_descriptions)
     logger.debug(f"👤 Found {len(persona_descriptions)} agent personas.")
@@ -308,4 +279,21 @@ def extract_code_and_thought(response_text: str) -> Tuple[Optional[str], str]:
     thought_preview = (thought_text[:100] + '...') if len(thought_text) > 100 else thought_text
     logger.debug(f"  - Extracted thought: {thought_preview}")
-    return extracted_code, thought_text
+    return extracted_code, thought_text
+def has_non_empty_content(msg):
+    content = msg.get('content', [])
+    if not content:  # Empty list or None
+        return False
+    # Check if any content item has non-empty text
+    for item in content:
+        if isinstance(item, dict) and item.get('text', '').strip():
+            return True
+        elif isinstance(item, str) and item.strip():
+            return True
+    return False
+@clean_span("remove_empty_messages")
+def remove_empty_messages(messages):
+    return [msg for msg in messages if has_non_empty_content(msg)]

droidrun/agent/utils/device_state_formatter.py ADDED Viewed

@@ -0,0 +1,177 @@
+from typing import Any, Dict, List, Tuple
+def format_phone_state(phone_state: Dict[str, Any]) -> str:
+    """
+    Format phone state data into a readable text block.
+    Args:
+        phone_state: Dictionary containing phone state information
+    Returns:
+        Formatted phone state text
+    """
+    if isinstance(phone_state, dict) and 'error' not in phone_state:
+        current_app = phone_state.get('currentApp', '')
+        package_name = phone_state.get('packageName', 'Unknown')
+        focused_element = phone_state.get('focusedElement')
+        is_editable = phone_state.get('isEditable', False)
+        # Format the focused element - just show the text content
+        if focused_element and focused_element.get('text'):
+            focused_desc = f"'{focused_element.get('text', '')}'"
+        else:
+            focused_desc = "''"
+        phone_state_text = f"""**Current Phone State:**
+• **App:** {current_app} ({package_name})
+• **Keyboard:** {'Visible' if is_editable else 'Hidden'}
+• **Focused Element:** {focused_desc}"""
+    else:
+        # Handle error cases or malformed data
+        if isinstance(phone_state, dict) and 'error' in phone_state:
+            phone_state_text = f"📱 **Phone State Error:** {phone_state.get('message', 'Unknown error')}"
+        else:
+            phone_state_text = f"📱 **Phone State:** {phone_state}"
+    return phone_state_text
+def format_ui_elements(ui_data: List[Dict[str, Any]], level: int = 0) -> str:
+    """
+    Format UI elements in the exact format: index. className: "resourceId", "text" - (bounds)
+    Args:
+        ui_data: List of UI element dictionaries
+        level: Indentation level for nested elements
+    Returns:
+        Formatted UI elements text
+    """
+    if not ui_data:
+        return ""
+    formatted_lines = []
+    indent = "  " * level  # Indentation for nested elements
+    # Handle both list and single element
+    elements = ui_data if isinstance(ui_data, list) else [ui_data]
+    for element in elements:
+        if not isinstance(element, dict):
+            continue
+        # Extract element properties
+        index = element.get('index', '')
+        class_name = element.get('className', '')
+        resource_id = element.get('resourceId', '')
+        text = element.get('text', '')
+        bounds = element.get('bounds', '')
+        children = element.get('children', [])
+        # Format the line: index. className: "resourceId", "text" - (bounds)
+        line_parts = []
+        if index != '':
+            line_parts.append(f"{index}.")
+        if class_name:
+            line_parts.append(class_name + ":")
+        # Build the quoted details section
+        details = []
+        if resource_id:
+            details.append(f'"{resource_id}"')
+        if text:
+            details.append(f'"{text}"')
+        if details:
+            line_parts.append(", ".join(details))
+        if bounds:
+            line_parts.append(f"- ({bounds})")
+        formatted_line = f"{indent}{' '.join(line_parts)}"
+        formatted_lines.append(formatted_line)
+        # Recursively format children with increased indentation
+        if children:
+            child_formatted = format_ui_elements(children, level + 1)
+            if child_formatted:
+                formatted_lines.append(child_formatted)
+    return "\n".join(formatted_lines)
+def get_device_state_exact_format(state: Dict[str, Any]) -> Tuple[str, str]:
+    """
+    Get device state in exactly the format requested:
+    **Current Phone State:**
+    • **App:** App Name (package.name)
+    • **Keyboard:** Hidden/Visible
+    • **Focused Element:** 'text'
+    Current Clickable UI elements from the device in the schema 'index. className: resourceId, text - bounds(x1,y1,x2,y2)':
+    1. ClassName: "resourceId", "text" - (x1, y1, x2, y2)
+    Args:
+        state: Dictionary containing device state data from collector.get_device_state()
+    Returns:
+        Tuple of (formatted_string, focused_text) where focused_text is the actual
+        text content of the focused element, or empty string if none.
+    """
+    try:
+        if "error" in state:
+            return (f"Error getting device state: {state.get('message', 'Unknown error')}", "")
+        # Extract focused element text
+        phone_state = state.get("phone_state", {})
+        focused_element = phone_state.get('focusedElement')
+        focused_text = ""
+        if focused_element:
+            focused_text = focused_element.get('text', '')
+        # Format the state data
+        phone_state_text = format_phone_state(phone_state)
+        ui_data = state.get("a11y_tree", [])
+        if ui_data:
+            formatted_ui = format_ui_elements(ui_data)
+            ui_elements_text = f"Current Clickable UI elements from the device in the schema 'index. className: resourceId, text - bounds(x1,y1,x2,y2)':\n{formatted_ui}"
+        else:
+            ui_elements_text = "Current Clickable UI elements from the device in the schema 'index. className: resourceId, text - bounds(x1,y1,x2,y2)':\nNo UI elements found"
+        formatted_string = f"{phone_state_text}\n        \n\n{ui_elements_text}"
+        return (formatted_string, focused_text)
+    except Exception as e:
+        return (f"Error getting device state: {e}", "")
+def main():
+    """Small test"""
+    example_state = {
+        "phone_state": {
+            "currentApp": "Settings",
+            "packageName": "com.android.settings",
+            "isEditable": False,
+            "focusedElement": {"text": "Search settings"}
+        },
+        "a11y_tree": [
+            {
+                "index": 1,
+                "className": "android.widget.TextView",
+                "resourceId": "com.android.settings:id/title",
+                "text": "Wi‑Fi",
+                "bounds": "100,200,300,250"
+            }
+        ]
+    }
+    formatted_string, focused_text = get_device_state_exact_format(example_state)
+    print("Formatted String:")
+    print(formatted_string)
+    print(f"\nFocused Text: '{focused_text}'")
+if __name__ == "__main__":
+    main()

droidrun/agent/utils/executer.py CHANGED Viewed

@@ -1,14 +1,15 @@
-import io
+import asyncio
 import contextlib
-import ast
-import traceback
+import io
 import logging
+import threading
+import traceback
+from asyncio import AbstractEventLoop
 from typing import Any, Dict
-from droidrun.agent.utils.async_utils import async_to_sync
 from llama_index.core.workflow import Context
-import asyncio
-from asyncio import AbstractEventLoop
-import threading
+from droidrun.agent.utils.async_utils import async_to_sync
 from droidrun.tools.adb import AdbTools
 logger = logging.getLogger("droidrun")
@@ -27,9 +28,9 @@ class SimpleCodeExecutor:
     def __init__(
         self,
         loop: AbstractEventLoop,
-        locals: Dict[str, Any] = {},
-        globals: Dict[str, Any] = {},
-        tools={},
+        locals: Dict[str, Any] = {},  # noqa: B006
+        globals: Dict[str, Any] = {},  # noqa: B006
+        tools={},  # noqa: B006
         tools_instance=None,
         use_same_scope: bool = True,
     ):
@@ -101,7 +102,7 @@ class SimpleCodeExecutor:
         self.globals['ui_state'] = await ctx.store.get("ui_state", None)
         self.globals['step_screenshots'] = []
         self.globals['step_ui_states'] = []
         if self.tools_instance and isinstance(self.tools_instance, AdbTools):
             self.tools_instance._set_context(ctx)

droidrun/agent/utils/inference.py ADDED Viewed

@@ -0,0 +1,114 @@
+import contextvars
+import threading
+import time
+from concurrent.futures import TimeoutError as FuturesTimeoutError
+import asyncio
+from typing import Any, Optional
+def call_with_retries(llm, messages, retries=3, timeout=500, delay=1.0):
+    last_exception = None
+    for attempt in range(1, retries + 1):
+        ctx = contextvars.copy_context()
+        result_holder = {}
+        error_holder = {}
+        def _target():
+            try:
+                result_holder["response"] = ctx.run(llm.chat, messages=messages)  # noqa: B023
+            except Exception as e:
+                error_holder["error"] = e  # noqa: B023
+        worker = threading.Thread(target=_target, daemon=True)
+        worker.start()
+        worker.join(timeout)
+        if worker.is_alive():
+            print(f"Attempt {attempt} timed out after {timeout} seconds")
+            # Do not join; thread is daemon and won't block process exit
+            last_exception = TimeoutError("Timed out")
+        else:
+            if "error" in error_holder:
+                err = error_holder["error"]
+                # Normalize FuturesTimeoutError if raised inside llm.chat
+                if isinstance(err, FuturesTimeoutError):
+                    print(f"Attempt {attempt} timed out inside LLM after {timeout} seconds")
+                    last_exception = TimeoutError("Timed out")
+                else:
+                    print(f"Attempt {attempt} failed with error: {err!r}")
+                    last_exception = err
+            else:
+                response = result_holder.get("response")
+                if (
+                    response is not None
+                    and getattr(response, "message", None) is not None
+                    and getattr(response.message, "content", None)
+                ):
+                    return response
+                else:
+                    print(f"Attempt {attempt} returned empty content")
+                    last_exception = ValueError("Empty response content")
+        if attempt < retries:
+            time.sleep(delay * attempt)
+    if last_exception:
+        raise last_exception
+    raise ValueError("All attempts returned empty response content")
+async def acall_with_retries(
+    llm,
+    messages: list,
+    retries: int = 3,
+    timeout: float = 500,
+    delay: float = 1.0
+) -> Any:
+    """
+    Call LLM with retries and timeout handling.
+    Args:
+        llm: The LLM client instance
+        messages: List of messages to send
+        retries: Number of retry attempts
+        timeout: Timeout in seconds for each attempt
+        delay: Base delay between retries (multiplied by attempt number)
+    Returns:
+        The LLM response object
+    """
+    last_exception: Optional[Exception] = None
+    for attempt in range(1, retries + 1):
+        try:
+            response = await asyncio.wait_for(
+                llm.achat(messages=messages),  # Use achat() instead of chat()
+                timeout=timeout
+            )
+            # Validate response
+            if (
+                response is not None
+                and getattr(response, "message", None) is not None
+                and getattr(response.message, "content", None)
+            ):
+                return response
+            else:
+                print(f"Attempt {attempt} returned empty content")
+                last_exception = ValueError("Empty response content")
+        except asyncio.TimeoutError:
+            print(f"Attempt {attempt} timed out after {timeout} seconds")
+            last_exception = TimeoutError("Timed out")
+        except Exception as e:
+            print(f"Attempt {attempt} failed with error: {e!r}")
+            last_exception = e
+        if attempt < retries:
+            await asyncio.sleep(delay * attempt)
+    if last_exception:
+        raise last_exception
+    raise ValueError("All attempts returned empty response content")

droidrun/agent/utils/llm_picker.py CHANGED Viewed

@@ -1,7 +1,9 @@
 import importlib
 import logging
 from typing import Any
 from llama_index.core.llms.llm import LLM
 from droidrun.agent.usage import track_usage
 # Configure logging

droidrun/agent/utils/message_utils.py ADDED Viewed

@@ -0,0 +1,85 @@
+"""
+Message conversion utilities for Manager Agent.
+Converts between dict message format and llama-index ChatMessage format.
+"""
+from io import BytesIO
+from pathlib import Path
+from typing import Union
+from llama_index.core.llms import ChatMessage, ImageBlock, TextBlock
+from PIL import Image
+def image_to_image_bytes(image_source: Union[str, Path, Image.Image, bytes]) -> bytes:
+    """
+    Convert image to bytes for ImageBlock.
+    Args:
+        image_source: Can be:
+            - str/Path: path to image file
+            - PIL.Image.Image: PIL Image object
+            - bytes: bytes of image
+    Returns:
+        Image bytes in PNG format
+    """
+    if isinstance(image_source, (str, Path)):
+        image = Image.open(image_source)
+    elif isinstance(image_source, Image.Image):
+        image = image_source
+    elif isinstance(image_source, bytes):
+        return image_source
+    else:
+        raise ValueError(f"Unsupported image source type: {type(image_source)}")
+    buffer = BytesIO()
+    image.save(buffer, format="PNG")
+    return buffer.getvalue()
+def convert_messages_to_chatmessages(messages: list[dict]) -> list[ChatMessage]:
+    """
+    Convert dict messages to llama-index ChatMessage format.
+    Dict format (input):
+        {
+            "role": "user" | "assistant" | "system",
+            "content": [
+                {"text": "some text"},
+                {"image": "/path/to/image.png"}  # or PIL Image
+            ]
+        }
+    ChatMessage format (output):
+        ChatMessage(
+            role="user",
+            blocks=[
+                TextBlock(text="some text"),
+                ImageBlock(image=b"...bytes...")
+            ]
+        )
+    Args:
+        messages: List of message dicts
+    Returns:
+        List of ChatMessage objects
+    """
+    chat_messages = []
+    for message in messages:
+        blocks = []
+        for item in message['content']:
+            if 'text' in item:
+                blocks.append(TextBlock(text=item['text']))
+            elif 'image' in item:
+                # Convert image to bytes
+                image_bytes = image_to_image_bytes(item['image'])
+                blocks.append(ImageBlock(image=image_bytes))
+        chat_messages.append(ChatMessage(role=message['role'], blocks=blocks))
+    return chat_messages

droidrun 0.3.9__py3-none-any.whl → 0.3.10.dev3__py3-none-any.whl

droidrun 0.3.9py3-none-any.whl → 0.3.10.dev3py3-none-any.whl