PyPI - llm-dialog-manager - Versions diffs - 0.4.3__tar.gz → 0.4.5__tar.gz - Mend

llm-dialog-manager 0.4.3tar.gz → 0.4.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{llm_dialog_manager-0.4.3 → llm_dialog_manager-0.4.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: llm_dialog_manager
-Version: 0.4.3
+Version: 0.4.5
 Summary: A Python package for managing LLM chat conversation history
 Author-email: xihajun <work@2333.fun>
 License: MIT

{llm_dialog_manager-0.4.3 → llm_dialog_manager-0.4.5}/llm_dialog_manager/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
 from .chat_history import ChatHistory
 from .agent import Agent
-__version__ = "0.4.3"
+__version__ = "0.4.5"

{llm_dialog_manager-0.4.3 → llm_dialog_manager-0.4.5}/llm_dialog_manager/agent.py RENAMED Viewed

@@ -2,7 +2,7 @@
 import json
 import os
 import uuid
-from typing import List, Dict, Optional, Union
+from typing import List, Dict, Union, Optional, Any
 import logging
 from pathlib import Path
 import random
@@ -38,6 +38,10 @@ def load_env_vars():
 load_env_vars()
+def encode_image(image_path):
+    with open(image_path, "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode("utf-8")
 def format_messages_for_gemini(messages):
     """
     将标准化的消息格式转化为 Gemini 格式。
@@ -93,13 +97,30 @@ def completion(model: str, messages: List[Dict[str, Union[str, List[Union[str, I
             api_key = os.getenv(f"{service.upper()}_API_KEY")
             base_url = os.getenv(f"{service.upper()}_BASE_URL")
-        def format_messages_for_api(model, messages):
-            """Convert ChatHistory messages to the format required by the specific API."""
+        def format_messages_for_api(
+            model: str,
+            messages: List[Dict[str, Union[str, List[Union[str, Image.Image, Dict]]]]]
+        ) -> tuple[Optional[str], List[Dict[str, Any]]]:
+            """
+            Convert ChatHistory messages to the format required by the specific API.
+            Args:
+                model: The model name (e.g., "claude", "gemini", "gpt")
+                messages: List of message dictionaries with role and content
+            Returns:
+                tuple: (system_message, formatted_messages)
+                    - system_message is extracted system message for Claude, None for others
+                    - formatted_messages is the list of formatted message dictionaries
+            """
             if "claude" in model and "openai" not in model:
                 formatted = []
                 system_msg = ""
+                # Extract system message if present
                 if messages and messages[0]["role"] == "system":
                     system_msg = messages.pop(0)["content"]
                 for msg in messages:
                     content = msg["content"]
                     if isinstance(content, str):
@@ -109,9 +130,12 @@ def completion(model: str, messages: List[Dict[str, Union[str, List[Union[str, I
                         combined_content = []
                         for block in content:
                             if isinstance(block, str):
-                                combined_content.append({"type": "text", "text": block})
+                                combined_content.append({
+                                    "type": "text",
+                                    "text": block
+                                })
                             elif isinstance(block, Image.Image):
-                                # For Claude, convert PIL.Image to base64
+                                # Convert PIL.Image to base64
                                 buffered = io.BytesIO()
                                 block.save(buffered, format="PNG")
                                 image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
@@ -141,9 +165,12 @@ def completion(model: str, messages: List[Dict[str, Union[str, List[Union[str, I
                                             "data": block["image_base64"]["data"]
                                         }
                                     })
-                        formatted.append({"role": msg["role"], "content": combined_content})
+                        formatted.append({
+                            "role": msg["role"],
+                            "content": combined_content
+                        })
                 return system_msg, formatted
             elif ("gemini" in model or "gpt" in model or "grok" in model) and "openai" not in model:
                 formatted = []
                 for msg in messages:
@@ -156,40 +183,75 @@ def completion(model: str, messages: List[Dict[str, Union[str, List[Union[str, I
                             if isinstance(block, str):
                                 parts.append(block)
                             elif isinstance(block, Image.Image):
+                                # Keep PIL.Image objects as is for Gemini
                                 parts.append(block)
                             elif isinstance(block, dict):
                                 if block.get("type") == "image_url":
-                                    parts.append({"type": "image_url", "image_url": {"url": block["image_url"]["url"]}})
+                                    parts.append({
+                                        "type": "image_url",
+                                        "image_url": {
+                                            "url": block["image_url"]["url"]
+                                        }
+                                    })
                                 elif block.get("type") == "image_base64":
-                                    parts.append({"type": "image_base64", "image_base64": {"data": block["image_base64"]["data"], "media_type": block["image_base64"]["media_type"]}})
-                        formatted.append({"role": msg["role"], "parts": parts})
+                                    parts.append({
+                                        "type": "image_base64",
+                                        "image_base64": {
+                                            "data": block["image_base64"]["data"],
+                                            "media_type": block["image_base64"]["media_type"]
+                                        }
+                                    })
+                        formatted.append({
+                            "role": msg["role"],
+                            "parts": parts
+                        })
                 return None, formatted
             else:  # OpenAI models
                 formatted = []
                 for msg in messages:
                     content = msg["content"]
                     if isinstance(content, str):
-                        formatted.append({"role": msg["role"], "content": content})
+                        formatted.append({
+                            "role": msg["role"],
+                            "content": content
+                        })
                     elif isinstance(content, list):
-                        # OpenAI expects 'content' as string; images are not directly supported
-                        # You can convert images to URLs or descriptions if needed
-                        combined_content = ""
+                        formatted_content = []
                         for block in content:
                             if isinstance(block, str):
-                                combined_content += block + "\n"
+                                formatted_content.append({
+                                    "type": "text",
+                                    "text": block
+                                })
                             elif isinstance(block, Image.Image):
-                                # Convert PIL.Image to base64 or upload and use URL
+                                # Convert PIL.Image to base64
                                 buffered = io.BytesIO()
                                 block.save(buffered, format="PNG")
                                 image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
-                                combined_content += f"[Image Base64: {image_base64[:30]}...]\n"
+                                formatted_content.append({
+                                    "type": "image_url",
+                                    "image_url": {
+                                        "url": f"data:image/jpeg;base64,{image_base64}"
+                                    }
+                                })
                             elif isinstance(block, dict):
                                 if block.get("type") == "image_url":
-                                    combined_content += f"[Image: {block['image_url']['url']}]\n"
+                                    formatted_content.append({
+                                        "type": "image_url",
+                                        "image_url": block["image_url"]
+                                    })
                                 elif block.get("type") == "image_base64":
-                                    combined_content += f"[Image Base64: {block['image_base64']['data'][:30]}...]\n"
-                        formatted.append({"role": msg["role"], "content": combined_content.strip()})
+                                    formatted_content.append({
+                                        "type": "image_url",
+                                        "image_url": {
+                                            "url": f"data:image/jpeg;base64,{block['image_base64']['data']}"
+                                        }
+                                    })
+                        formatted.append({
+                            "role": msg["role"],
+                            "content": formatted_content
+                        })
                 return None, formatted
         system_msg, formatted_messages = format_messages_for_api(model, messages.copy())
@@ -393,22 +455,42 @@ class Agent:
                 # For Gemini, load as PIL.Image
                 image_pil = Image.open(image_path)
                 image_block = image_pil
-            else:
+            elif "claude" in self.model_name and "openai" not in self.model_name:
                 # For Claude and others, use base64 encoding
                 with open(image_path, "rb") as img_file:
                     image_data = base64.standard_b64encode(img_file.read()).decode("utf-8")
                 image_block = {
-                    "type": "image_base64",
-                    "image_base64": {
+                    "type": "image",
+                    "source": {
+                        "type": "base64",
                         "media_type": media_type,
-                        "data": image_data
-                    }
+                        "data": image_data,
+                    },
+                }
+            else:
+                # openai format
+                base64_image = encode_image(image_path)
+                image_block = {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
                 }
         else:
             # If image_url is provided
             if "gemini" in self.model_name and "openai" not in self.model_name:
                 # For Gemini, you can pass image URLs directly
                 image_block = {"type": "image_url", "image_url": {"url": image_url}}
+            elif "claude" in self.model_name and "openai" not in self.model_name:
+                import httpx
+                media_type = "image/jpeg"
+                image_data = base64.standard_b64encode(httpx.get(image_url).content).decode("utf-8")
+                image_block = {
+                    "type": "image",
+                    "source": {
+                        "type": "base64",
+                        "media_type": media_type,
+                        "data": image_data,
+                    },
+                }
             else:
                 # For Claude and others, use image URLs
                 image_block = {
@@ -522,10 +604,10 @@ class Agent:
 if __name__ == "__main__":
     # Example Usage
     # Create an Agent instance (Gemini model)
-    agent = Agent("gemini-1.5-flash", "you are Jack101", memory_enabled=True)
+    agent = Agent("gemini-1.5-flash-openai", "you are Jack101", memory_enabled=True)
     # Add an image
-    agent.add_image(image_path="/Users/junfan/Projects/Personal/oneapi/dialog_manager/example.png")
+    agent.add_image(image_path="example.png")
     # Add a user message
     agent.add_message("user", "Who are you? What's in this image?")

{llm_dialog_manager-0.4.3 → llm_dialog_manager-0.4.5}/llm_dialog_manager.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: llm_dialog_manager
-Version: 0.4.3
+Version: 0.4.5
 Summary: A Python package for managing LLM chat conversation history
 Author-email: xihajun <work@2333.fun>
 License: MIT

{llm_dialog_manager-0.4.3 → llm_dialog_manager-0.4.5}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "llm_dialog_manager"
-version = "0.4.3"
+version = "0.4.5"
 description = "A Python package for managing LLM chat conversation history"
 readme = "README.md"
 classifiers = [ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Scientific/Engineering :: Artificial Intelligence",]