llm-dialog-manager 0.4.3__tar.gz → 0.4.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (17) hide show
  1. {llm_dialog_manager-0.4.3 → llm_dialog_manager-0.4.5}/PKG-INFO +1 -1
  2. {llm_dialog_manager-0.4.3 → llm_dialog_manager-0.4.5}/llm_dialog_manager/__init__.py +1 -1
  3. {llm_dialog_manager-0.4.3 → llm_dialog_manager-0.4.5}/llm_dialog_manager/agent.py +110 -28
  4. {llm_dialog_manager-0.4.3 → llm_dialog_manager-0.4.5}/llm_dialog_manager.egg-info/PKG-INFO +1 -1
  5. {llm_dialog_manager-0.4.3 → llm_dialog_manager-0.4.5}/pyproject.toml +1 -1
  6. {llm_dialog_manager-0.4.3 → llm_dialog_manager-0.4.5}/LICENSE +0 -0
  7. {llm_dialog_manager-0.4.3 → llm_dialog_manager-0.4.5}/README.md +0 -0
  8. {llm_dialog_manager-0.4.3 → llm_dialog_manager-0.4.5}/llm_dialog_manager/chat_history.py +0 -0
  9. {llm_dialog_manager-0.4.3 → llm_dialog_manager-0.4.5}/llm_dialog_manager/key_manager.py +0 -0
  10. {llm_dialog_manager-0.4.3 → llm_dialog_manager-0.4.5}/llm_dialog_manager.egg-info/SOURCES.txt +0 -0
  11. {llm_dialog_manager-0.4.3 → llm_dialog_manager-0.4.5}/llm_dialog_manager.egg-info/dependency_links.txt +0 -0
  12. {llm_dialog_manager-0.4.3 → llm_dialog_manager-0.4.5}/llm_dialog_manager.egg-info/requires.txt +0 -0
  13. {llm_dialog_manager-0.4.3 → llm_dialog_manager-0.4.5}/llm_dialog_manager.egg-info/top_level.txt +0 -0
  14. {llm_dialog_manager-0.4.3 → llm_dialog_manager-0.4.5}/setup.cfg +0 -0
  15. {llm_dialog_manager-0.4.3 → llm_dialog_manager-0.4.5}/tests/test_agent.py +0 -0
  16. {llm_dialog_manager-0.4.3 → llm_dialog_manager-0.4.5}/tests/test_chat_history.py +0 -0
  17. {llm_dialog_manager-0.4.3 → llm_dialog_manager-0.4.5}/tests/test_key_manager.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: llm_dialog_manager
3
- Version: 0.4.3
3
+ Version: 0.4.5
4
4
  Summary: A Python package for managing LLM chat conversation history
5
5
  Author-email: xihajun <work@2333.fun>
6
6
  License: MIT
@@ -1,4 +1,4 @@
1
1
  from .chat_history import ChatHistory
2
2
  from .agent import Agent
3
3
 
4
- __version__ = "0.4.3"
4
+ __version__ = "0.4.5"
@@ -2,7 +2,7 @@
2
2
  import json
3
3
  import os
4
4
  import uuid
5
- from typing import List, Dict, Optional, Union
5
+ from typing import List, Dict, Union, Optional, Any
6
6
  import logging
7
7
  from pathlib import Path
8
8
  import random
@@ -38,6 +38,10 @@ def load_env_vars():
38
38
 
39
39
  load_env_vars()
40
40
 
41
+ def encode_image(image_path):
42
+ with open(image_path, "rb") as image_file:
43
+ return base64.b64encode(image_file.read()).decode("utf-8")
44
+
41
45
  def format_messages_for_gemini(messages):
42
46
  """
43
47
  将标准化的消息格式转化为 Gemini 格式。
@@ -93,13 +97,30 @@ def completion(model: str, messages: List[Dict[str, Union[str, List[Union[str, I
93
97
  api_key = os.getenv(f"{service.upper()}_API_KEY")
94
98
  base_url = os.getenv(f"{service.upper()}_BASE_URL")
95
99
 
96
- def format_messages_for_api(model, messages):
97
- """Convert ChatHistory messages to the format required by the specific API."""
100
+ def format_messages_for_api(
101
+ model: str,
102
+ messages: List[Dict[str, Union[str, List[Union[str, Image.Image, Dict]]]]]
103
+ ) -> tuple[Optional[str], List[Dict[str, Any]]]:
104
+ """
105
+ Convert ChatHistory messages to the format required by the specific API.
106
+
107
+ Args:
108
+ model: The model name (e.g., "claude", "gemini", "gpt")
109
+ messages: List of message dictionaries with role and content
110
+
111
+ Returns:
112
+ tuple: (system_message, formatted_messages)
113
+ - system_message is extracted system message for Claude, None for others
114
+ - formatted_messages is the list of formatted message dictionaries
115
+ """
98
116
  if "claude" in model and "openai" not in model:
99
117
  formatted = []
100
118
  system_msg = ""
119
+
120
+ # Extract system message if present
101
121
  if messages and messages[0]["role"] == "system":
102
122
  system_msg = messages.pop(0)["content"]
123
+
103
124
  for msg in messages:
104
125
  content = msg["content"]
105
126
  if isinstance(content, str):
@@ -109,9 +130,12 @@ def completion(model: str, messages: List[Dict[str, Union[str, List[Union[str, I
109
130
  combined_content = []
110
131
  for block in content:
111
132
  if isinstance(block, str):
112
- combined_content.append({"type": "text", "text": block})
133
+ combined_content.append({
134
+ "type": "text",
135
+ "text": block
136
+ })
113
137
  elif isinstance(block, Image.Image):
114
- # For Claude, convert PIL.Image to base64
138
+ # Convert PIL.Image to base64
115
139
  buffered = io.BytesIO()
116
140
  block.save(buffered, format="PNG")
117
141
  image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
@@ -141,9 +165,12 @@ def completion(model: str, messages: List[Dict[str, Union[str, List[Union[str, I
141
165
  "data": block["image_base64"]["data"]
142
166
  }
143
167
  })
144
- formatted.append({"role": msg["role"], "content": combined_content})
168
+ formatted.append({
169
+ "role": msg["role"],
170
+ "content": combined_content
171
+ })
145
172
  return system_msg, formatted
146
-
173
+
147
174
  elif ("gemini" in model or "gpt" in model or "grok" in model) and "openai" not in model:
148
175
  formatted = []
149
176
  for msg in messages:
@@ -156,40 +183,75 @@ def completion(model: str, messages: List[Dict[str, Union[str, List[Union[str, I
156
183
  if isinstance(block, str):
157
184
  parts.append(block)
158
185
  elif isinstance(block, Image.Image):
186
+ # Keep PIL.Image objects as is for Gemini
159
187
  parts.append(block)
160
188
  elif isinstance(block, dict):
161
189
  if block.get("type") == "image_url":
162
- parts.append({"type": "image_url", "image_url": {"url": block["image_url"]["url"]}})
190
+ parts.append({
191
+ "type": "image_url",
192
+ "image_url": {
193
+ "url": block["image_url"]["url"]
194
+ }
195
+ })
163
196
  elif block.get("type") == "image_base64":
164
- parts.append({"type": "image_base64", "image_base64": {"data": block["image_base64"]["data"], "media_type": block["image_base64"]["media_type"]}})
165
- formatted.append({"role": msg["role"], "parts": parts})
197
+ parts.append({
198
+ "type": "image_base64",
199
+ "image_base64": {
200
+ "data": block["image_base64"]["data"],
201
+ "media_type": block["image_base64"]["media_type"]
202
+ }
203
+ })
204
+ formatted.append({
205
+ "role": msg["role"],
206
+ "parts": parts
207
+ })
166
208
  return None, formatted
167
-
209
+
168
210
  else: # OpenAI models
169
211
  formatted = []
170
212
  for msg in messages:
171
213
  content = msg["content"]
172
214
  if isinstance(content, str):
173
- formatted.append({"role": msg["role"], "content": content})
215
+ formatted.append({
216
+ "role": msg["role"],
217
+ "content": content
218
+ })
174
219
  elif isinstance(content, list):
175
- # OpenAI expects 'content' as string; images are not directly supported
176
- # You can convert images to URLs or descriptions if needed
177
- combined_content = ""
220
+ formatted_content = []
178
221
  for block in content:
179
222
  if isinstance(block, str):
180
- combined_content += block + "\n"
223
+ formatted_content.append({
224
+ "type": "text",
225
+ "text": block
226
+ })
181
227
  elif isinstance(block, Image.Image):
182
- # Convert PIL.Image to base64 or upload and use URL
228
+ # Convert PIL.Image to base64
183
229
  buffered = io.BytesIO()
184
230
  block.save(buffered, format="PNG")
185
231
  image_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
186
- combined_content += f"[Image Base64: {image_base64[:30]}...]\n"
232
+ formatted_content.append({
233
+ "type": "image_url",
234
+ "image_url": {
235
+ "url": f"data:image/jpeg;base64,{image_base64}"
236
+ }
237
+ })
187
238
  elif isinstance(block, dict):
188
239
  if block.get("type") == "image_url":
189
- combined_content += f"[Image: {block['image_url']['url']}]\n"
240
+ formatted_content.append({
241
+ "type": "image_url",
242
+ "image_url": block["image_url"]
243
+ })
190
244
  elif block.get("type") == "image_base64":
191
- combined_content += f"[Image Base64: {block['image_base64']['data'][:30]}...]\n"
192
- formatted.append({"role": msg["role"], "content": combined_content.strip()})
245
+ formatted_content.append({
246
+ "type": "image_url",
247
+ "image_url": {
248
+ "url": f"data:image/jpeg;base64,{block['image_base64']['data']}"
249
+ }
250
+ })
251
+ formatted.append({
252
+ "role": msg["role"],
253
+ "content": formatted_content
254
+ })
193
255
  return None, formatted
194
256
 
195
257
  system_msg, formatted_messages = format_messages_for_api(model, messages.copy())
@@ -393,22 +455,42 @@ class Agent:
393
455
  # For Gemini, load as PIL.Image
394
456
  image_pil = Image.open(image_path)
395
457
  image_block = image_pil
396
- else:
458
+ elif "claude" in self.model_name and "openai" not in self.model_name:
397
459
  # For Claude and others, use base64 encoding
398
460
  with open(image_path, "rb") as img_file:
399
461
  image_data = base64.standard_b64encode(img_file.read()).decode("utf-8")
400
462
  image_block = {
401
- "type": "image_base64",
402
- "image_base64": {
463
+ "type": "image",
464
+ "source": {
465
+ "type": "base64",
403
466
  "media_type": media_type,
404
- "data": image_data
405
- }
467
+ "data": image_data,
468
+ },
469
+ }
470
+ else:
471
+ # openai format
472
+ base64_image = encode_image(image_path)
473
+ image_block = {
474
+ "type": "image_url",
475
+ "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
406
476
  }
407
477
  else:
408
478
  # If image_url is provided
409
479
  if "gemini" in self.model_name and "openai" not in self.model_name:
410
480
  # For Gemini, you can pass image URLs directly
411
481
  image_block = {"type": "image_url", "image_url": {"url": image_url}}
482
+ elif "claude" in self.model_name and "openai" not in self.model_name:
483
+ import httpx
484
+ media_type = "image/jpeg"
485
+ image_data = base64.standard_b64encode(httpx.get(image_url).content).decode("utf-8")
486
+ image_block = {
487
+ "type": "image",
488
+ "source": {
489
+ "type": "base64",
490
+ "media_type": media_type,
491
+ "data": image_data,
492
+ },
493
+ }
412
494
  else:
413
495
  # For Claude and others, use image URLs
414
496
  image_block = {
@@ -522,10 +604,10 @@ class Agent:
522
604
  if __name__ == "__main__":
523
605
  # Example Usage
524
606
  # Create an Agent instance (Gemini model)
525
- agent = Agent("gemini-1.5-flash", "you are Jack101", memory_enabled=True)
607
+ agent = Agent("gemini-1.5-flash-openai", "you are Jack101", memory_enabled=True)
526
608
 
527
609
  # Add an image
528
- agent.add_image(image_path="/Users/junfan/Projects/Personal/oneapi/dialog_manager/example.png")
610
+ agent.add_image(image_path="example.png")
529
611
 
530
612
  # Add a user message
531
613
  agent.add_message("user", "Who are you? What's in this image?")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: llm_dialog_manager
3
- Version: 0.4.3
3
+ Version: 0.4.5
4
4
  Summary: A Python package for managing LLM chat conversation history
5
5
  Author-email: xihajun <work@2333.fun>
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "llm_dialog_manager"
7
- version = "0.4.3"
7
+ version = "0.4.5"
8
8
  description = "A Python package for managing LLM chat conversation history"
9
9
  readme = "README.md"
10
10
  classifiers = [ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Scientific/Engineering :: Artificial Intelligence",]