dm-aioaiagent 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dm_aioaiagent/__init__.py CHANGED
@@ -1,3 +1,4 @@
1
1
  from .ai_agent import DMAIAgent
2
2
  from .async_ai_agent import DMAioAIAgent
3
- from .openai_image_message_content import OpenAIImageMessageContent
3
+ from .input_image import InputImage
4
+ from .output_image import OutputImage
dm_aioaiagent/ai_agent.py CHANGED
@@ -1,22 +1,33 @@
1
+ import copy
1
2
  import os
3
+ import re
2
4
  import uuid
3
- from typing import Any
4
- from pydantic import SecretStr
5
+ from typing import Any, Literal, Optional, Type
6
+ from pydantic import BaseModel, Field, SecretStr
5
7
  from itertools import dropwhile
6
8
  from threading import Thread
7
9
  from langchain.chat_models import init_chat_model
8
- from langchain_core.tools import BaseTool
10
+ from langchain_core.tools import BaseTool, StructuredTool
9
11
  from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
10
12
  from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, ToolMessage
11
13
  from langgraph.graph import StateGraph
12
14
  from dm_logger import DMLogger
13
15
 
16
+ from .output_image import OutputImage
14
17
  from .types import *
15
18
 
16
19
 
17
20
  class DMAIAgent:
18
21
  MAX_MEMORY_MESSAGES = 20 # Only INT greater than 0
22
+ ImageMemoryMode = Literal["drop", "keep_last", "keep_all"]
19
23
  _ALLOWED_ROLES = ("user", "ai")
24
+ _VALID_IMAGE_MEMORY_MODES = ("drop", "keep_last", "keep_all")
25
+ _INVALID_IMAGE_ERROR_MARKERS = (
26
+ "invalid_image_url",
27
+ "Could not process image",
28
+ "Unable to process input image",
29
+ "INVALID_ARGUMENT",
30
+ )
20
31
 
21
32
  def __init__(
22
33
  self,
@@ -35,6 +46,9 @@ class DMAIAgent:
35
46
  is_memory_enabled: bool = True,
36
47
  save_tools_responses_in_memory: bool = True,
37
48
  max_memory_messages: int = MAX_MEMORY_MESSAGES,
49
+ # multimodal
50
+ enable_image_generation: bool = False,
51
+ image_memory_mode: ImageMemoryMode = "keep_last",
38
52
  # other
39
53
  input_output_logging: bool = True,
40
54
  node_execution_logging: bool = True,
@@ -44,7 +58,8 @@ class DMAIAgent:
44
58
  llm_provider_api_key: str = "",
45
59
  llm_provider_base_url: str = ""
46
60
  ):
47
- self._logger = DMLogger(agent_name)
61
+ self._agent_name = str(agent_name)
62
+ self._logger = DMLogger(self._agent_name)
48
63
 
49
64
  # general
50
65
  self._system_message = str(system_message)
@@ -66,6 +81,10 @@ class DMAIAgent:
66
81
  self._is_memory_enabled = bool(is_memory_enabled)
67
82
  self._save_tools_responses_in_memory = bool(save_tools_responses_in_memory)
68
83
  self._max_memory_messages = self._validate_max_memory_messages(max_memory_messages)
84
+ # multimodal
85
+ self._enable_image_generation = bool(enable_image_generation)
86
+ self._image_memory_mode = self._validate_image_memory_mode(image_memory_mode)
87
+ self._images: list[OutputImage] = []
69
88
  # other
70
89
  self._input_output_logging = bool(input_output_logging)
71
90
  self._node_execution_logging = bool(node_execution_logging)
@@ -84,9 +103,18 @@ class DMAIAgent:
84
103
 
85
104
  last_message = new_messages[-1]
86
105
  if isinstance(last_message, AIMessage):
87
- return last_message.content
106
+ return self._extract_text(last_message)
88
107
  return last_message
89
108
 
109
+ @staticmethod
110
+ def _extract_text(message: AIMessage) -> str:
111
+ # AIMessage.content may be a plain string (legacy) or a list of standard
112
+ # v1 content blocks (multimodal). For non-string content collect every
113
+ # text block; if there are none, return empty string.
114
+ if isinstance(message.content, str):
115
+ return message.content
116
+ return "".join(b["text"] for b in message.content_blocks if b.get("type") == "text")
117
+
90
118
  def run_messages(
91
119
  self,
92
120
  messages: list[InputMessage],
@@ -98,8 +126,6 @@ class DMAIAgent:
98
126
  ) -> list[BaseMessage]:
99
127
  if ls_metadata is None:
100
128
  ls_metadata = {}
101
- if isinstance(ls_run_id, uuid.UUID):
102
- ls_run_id = ls_run_id
103
129
  if isinstance(ls_thread_id, uuid.UUID):
104
130
  ls_metadata["thread_id"] = ls_thread_id
105
131
 
@@ -118,8 +144,48 @@ class DMAIAgent:
118
144
  def memory_messages(self) -> list[BaseMessage]:
119
145
  return self._memory_messages
120
146
 
147
+ @property
148
+ def images(self) -> tuple[OutputImage, ...]:
149
+ return tuple(self._images)
150
+
121
151
  def clear_memory_messages(self) -> None:
122
152
  self._memory_messages.clear()
153
+ self._images.clear()
154
+
155
+ def as_tool(
156
+ self,
157
+ *,
158
+ description: str,
159
+ name: Optional[str] = None,
160
+ args_schema: Optional[Type[BaseModel]] = None,
161
+ ) -> BaseTool:
162
+ # Wraps this agent as a StructuredTool callable from another agent
163
+ # (multi-agent composition). Default tool name is derived from
164
+ # `agent_name` by lowercasing and replacing every non-[a-z0-9] run
165
+ # with a single underscore (matches the regex `[a-z0-9_]+`). Default
166
+ # args_schema is a single `query: str` field. `description` is required
167
+ # — without it the parent model has no signal for when to call.
168
+ if not description:
169
+ raise ValueError("`description` is required for as_tool().")
170
+ tool_name = name if name else self._sanitize_tool_name(self._agent_name)
171
+ schema = args_schema if args_schema is not None else self._default_tool_args_schema()
172
+ return StructuredTool.from_function(
173
+ name=tool_name,
174
+ description=description,
175
+ args_schema=schema,
176
+ func=lambda query, **kw: self.run(query),
177
+ )
178
+
179
+ @staticmethod
180
+ def _sanitize_tool_name(raw: str) -> str:
181
+ sanitized = re.sub(r"[^a-z0-9]+", "_", raw.lower()).strip("_")
182
+ return sanitized or "agent"
183
+
184
+ @staticmethod
185
+ def _default_tool_args_schema() -> Type[BaseModel]:
186
+ class _AgentToolInput(BaseModel):
187
+ query: str = Field(..., description="User query for the wrapped agent.")
188
+ return _AgentToolInput
123
189
 
124
190
  def _prepare_messages_node(self, state: State) -> State:
125
191
  messages = state["messages"] or [{"role": "user", "content": ""}]
@@ -129,16 +195,17 @@ class DMAIAgent:
129
195
  role = item.get("role")
130
196
  content = item.get("content")
131
197
  if not role or role not in self._ALLOWED_ROLES or not content:
198
+ self._logger.debug("Skipped malformed input dict", role=role, content_type=type(content).__name__)
132
199
  continue
133
- if role == "ai":
134
- MessageClass = AIMessage
135
- else:
136
- MessageClass = HumanMessage
137
- state["messages"].append(MessageClass(content))
200
+ # content may be a plain str (legacy) or a list of v1 standard
201
+ # content blocks (multimodal: text + image/audio/video/file).
202
+ # Both shapes are accepted by HumanMessage/AIMessage as-is.
203
+ MessageClass = AIMessage if role == "ai" else HumanMessage
204
+ state["messages"].append(MessageClass(content=content))
138
205
  elif isinstance(item, BaseMessage):
139
206
  state["messages"].append(item)
140
207
 
141
- if self._input_output_logging:
208
+ if self._input_output_logging and state["messages"]:
142
209
  self._logger.debug(f'Query:\n{state["messages"][-1].content}')
143
210
  if self._is_memory_enabled:
144
211
  state["messages"] = self._memory_messages + state["messages"]
@@ -152,7 +219,8 @@ class DMAIAgent:
152
219
  except Exception as e:
153
220
  self._logger.error(e)
154
221
  if second_attempt:
155
- if "invalid_image_url" in str(e):
222
+ err_str = str(e)
223
+ if any(m in err_str for m in self._INVALID_IMAGE_ERROR_MARKERS):
156
224
  response = self._response_if_invalid_image
157
225
  else:
158
226
  response = self._response_if_request_fail
@@ -221,8 +289,19 @@ class DMAIAgent:
221
289
  answer = answer.content
222
290
  self._logger.debug(f'Answer:\n{answer}')
223
291
 
292
+ last = state["messages"][-1] if state["messages"] else None
293
+ new_imgs = OutputImage.extract_from(last) if isinstance(last, AIMessage) else []
294
+ if self._image_memory_mode == "drop":
295
+ self._images = list(new_imgs)
296
+ elif self._image_memory_mode == "keep_last":
297
+ if new_imgs:
298
+ self._images = list(new_imgs)
299
+ else: # keep_all
300
+ self._images.extend(new_imgs)
301
+
224
302
  if self._is_memory_enabled:
225
303
  messages_to_memory = state["messages"][-self._max_memory_messages:]
304
+ messages_to_memory = self._apply_image_memory_mode(messages_to_memory)
226
305
  if self._save_tools_responses_in_memory:
227
306
  # drop ToolsMessages from start of list
228
307
  self._memory_messages = list(dropwhile(lambda x: isinstance(x, ToolMessage), messages_to_memory))
@@ -234,6 +313,43 @@ class DMAIAgent:
234
313
  self._memory_messages.append(mes)
235
314
  return state
236
315
 
316
+ def _apply_image_memory_mode(self, messages: list) -> list:
317
+ # Returns a list of messages with image blocks stripped according to
318
+ # self._image_memory_mode. ToolMessages are never touched. Original
319
+ # message objects are deep-copied before mutation, so callers holding
320
+ # references (notably state["new_messages"]) see untouched content.
321
+ if self._image_memory_mode == "keep_all":
322
+ return list(messages)
323
+
324
+ last_user_image_idx = -1
325
+ last_ai_image_idx = -1
326
+ for i, m in enumerate(messages):
327
+ if isinstance(m, ToolMessage) or not isinstance(m.content, list):
328
+ continue
329
+ if not any(isinstance(b, dict) and b.get("type") == "image" for b in m.content):
330
+ continue
331
+ if isinstance(m, HumanMessage):
332
+ last_user_image_idx = i
333
+ elif isinstance(m, AIMessage):
334
+ last_ai_image_idx = i
335
+
336
+ result = []
337
+ for i, m in enumerate(messages):
338
+ if isinstance(m, ToolMessage) or not isinstance(m.content, list):
339
+ result.append(m)
340
+ continue
341
+ if self._image_memory_mode == "keep_last" and i in (last_user_image_idx, last_ai_image_idx):
342
+ result.append(m)
343
+ continue
344
+ if not any(isinstance(b, dict) and b.get("type") == "image" for b in m.content):
345
+ result.append(m)
346
+ continue
347
+ placeholder = {"type": "text", "text": "[generated image]" if isinstance(m, AIMessage) else "[image]"}
348
+ m_copy = copy.deepcopy(m)
349
+ m_copy.content = [placeholder if (isinstance(b, dict) and b.get("type") == "image") else b for b in m_copy.content]
350
+ result.append(m_copy)
351
+ return result
352
+
237
353
  def _messages_router(self, state: State) -> str:
238
354
  if self._output_schema:
239
355
  return "exit"
@@ -242,7 +358,7 @@ class DMAIAgent:
242
358
  return "exit"
243
359
 
244
360
  def _init_agent(self) -> None:
245
- base_kwargs = {"model": self._model}
361
+ base_kwargs = {"model": self._model, "output_version": "v1"}
246
362
  if self._temperature is not None:
247
363
  if not isinstance(self._temperature, (int, float)):
248
364
  raise ValueError("Temperature must be a float value.")
@@ -252,9 +368,35 @@ class DMAIAgent:
252
368
  if self._llm_provider_base_url:
253
369
  base_kwargs["base_url"] = self._llm_provider_base_url
254
370
 
371
+ # Pre-init: OpenAI image generation needs the Responses API.
372
+ # Detect the requested provider from the model string (init_chat_model
373
+ # itself does the same) so we can flip use_responses_api before init.
374
+ if self._enable_image_generation and self._wants_openai_provider(self._model):
375
+ base_kwargs["use_responses_api"] = True
376
+
377
+ # Pre-init: Gemini image-output models also need the IMAGE modality —
378
+ # but only if the user opted into image generation via the master flag.
379
+ is_gemini_image = self._is_gemini_image_model(self._model)
380
+ if self._enable_image_generation and is_gemini_image:
381
+ base_kwargs["response_modalities"] = ["IMAGE", "TEXT"]
382
+
255
383
  llm = init_chat_model(**base_kwargs)
256
384
 
257
- provider = self._detect_provider(self._model)
385
+ provider = self._get_provider(llm)
386
+
387
+ # Cross-provider warnings for image-generation flag mismatches.
388
+ if self._enable_image_generation and self._output_schema:
389
+ self._logger.warning(
390
+ "output_schema disables tools — enable_image_generation will be ignored."
391
+ )
392
+ if self._enable_image_generation and provider == "anthropic":
393
+ self._logger.warning("Claude does not support image generation; the flag is ignored.")
394
+ if not self._enable_image_generation and is_gemini_image:
395
+ self._logger.warning(
396
+ f"Model {self._model!r} is image-capable but enable_image_generation=False "
397
+ f"— set the flag to True to let it draw."
398
+ )
399
+
258
400
  if provider == "anthropic":
259
401
  bind_tool_kwargs = {"tool_choice": {"type": "auto"}}
260
402
  if isinstance(self._parallel_tool_calls, bool):
@@ -268,7 +410,13 @@ class DMAIAgent:
268
410
 
269
411
  if self._is_tools_exists:
270
412
  self._tool_map = {t.name: t for t in self._tools}
271
- llm = llm.bind_tools(self._tools, **bind_tool_kwargs)
413
+
414
+ tools_to_bind: list = list(self._tools)
415
+ if self._enable_image_generation and provider == "openai" and not self._output_schema:
416
+ tools_to_bind.append({"type": "image_generation"})
417
+
418
+ if tools_to_bind:
419
+ llm = llm.bind_tools(tools_to_bind, **bind_tool_kwargs)
272
420
 
273
421
  if self._output_schema:
274
422
  llm = llm.with_structured_output(self._output_schema)
@@ -278,14 +426,38 @@ class DMAIAgent:
278
426
  self._agent = prompt | llm
279
427
 
280
428
  @staticmethod
281
- def _detect_provider(model: str) -> str:
429
+ def _get_provider(llm) -> str:
430
+ # Derive provider tag from the chat-model class name: strip "Chat" prefix
431
+ # and lowercase. Vertex AI is aliased to Google Generative AI — they are
432
+ # the same provider for our branching purposes.
433
+ name = type(llm).__name__
434
+ if name.startswith("Chat"):
435
+ name = name[4:]
436
+ provider = name.lower()
437
+ if provider == "vertexai":
438
+ provider = "googlegenerativeai"
439
+ return provider
440
+
441
+ @staticmethod
442
+ def _wants_openai_provider(model: str) -> bool:
443
+ # Used pre-init for kwargs that must be set on the constructor
444
+ # (use_responses_api). OpenAI prefixes are stable; other providers go
445
+ # through post-init _get_provider().
282
446
  if ":" in model:
283
- return model.split(":", 1)[0].replace("-", "_").lower()
284
- if model.startswith(("gpt-", "o1", "o3")):
285
- return "openai"
286
- if model.startswith("claude"):
287
- return "anthropic"
288
- return ""
447
+ return model.split(":", 1)[0].lower() == "openai"
448
+ m = model.lower()
449
+ return m.startswith(("gpt-", "o1", "o3", "o4", "chatgpt"))
450
+
451
+ @staticmethod
452
+ def _is_gemini_image_model(model: str) -> bool:
453
+ # Model names like "gemini-2.5-flash-image" or
454
+ # "gemini-2.0-flash-preview-image-generation" — must opt into the
455
+ # IMAGE modality at init time, otherwise the model still answers in
456
+ # text only.
457
+ m = model.lower()
458
+ if ":" in m:
459
+ m = m.split(":", 1)[1]
460
+ return m.startswith("gemini") and "image" in m
289
461
 
290
462
  def _init_graph(self) -> None:
291
463
  workflow = StateGraph(State)
@@ -317,6 +489,14 @@ class DMAIAgent:
317
489
  return max_messages_in_memory
318
490
  return cls.MAX_MEMORY_MESSAGES
319
491
 
492
+ @classmethod
493
+ def _validate_image_memory_mode(cls, mode: str) -> str:
494
+ if mode in cls._VALID_IMAGE_MEMORY_MODES:
495
+ return mode
496
+ raise ValueError(
497
+ f"image_memory_mode must be one of {cls._VALID_IMAGE_MEMORY_MODES}, got {mode!r}."
498
+ )
499
+
320
500
  @staticmethod
321
501
  def _validate_output_schema(schema: OutputSchemaType) -> OutputSchemaType:
322
502
  if schema is None:
@@ -1,7 +1,9 @@
1
1
  import uuid
2
2
  import asyncio
3
- from typing import Any
3
+ from typing import Any, Optional, Type
4
+ from pydantic import BaseModel
4
5
  from langchain_core.messages import AIMessage, ToolMessage
6
+ from langchain_core.tools import BaseTool, StructuredTool
5
7
 
6
8
  from .ai_agent import DMAIAgent
7
9
  from .types import *
@@ -27,7 +29,7 @@ class DMAioAIAgent(DMAIAgent):
27
29
 
28
30
  last_message = new_messages[-1]
29
31
  if isinstance(last_message, AIMessage):
30
- return last_message.content
32
+ return self._extract_text(last_message)
31
33
  return last_message
32
34
 
33
35
  async def run_messages(
@@ -41,8 +43,6 @@ class DMAioAIAgent(DMAIAgent):
41
43
  ) -> list[BaseMessage]:
42
44
  if ls_metadata is None:
43
45
  ls_metadata = {}
44
- if isinstance(ls_run_id, uuid.UUID):
45
- ls_run_id = ls_run_id
46
46
  if isinstance(ls_thread_id, uuid.UUID):
47
47
  ls_metadata["thread_id"] = ls_thread_id
48
48
 
@@ -65,7 +65,8 @@ class DMAioAIAgent(DMAIAgent):
65
65
  except Exception as e:
66
66
  self._logger.error(e)
67
67
  if second_attempt:
68
- if "invalid_image_url" in str(e):
68
+ err_str = str(e)
69
+ if any(m in err_str for m in self._INVALID_IMAGE_ERROR_MARKERS):
69
70
  response = self._response_if_invalid_image
70
71
  else:
71
72
  response = self._response_if_request_fail
@@ -123,3 +124,29 @@ class DMAioAIAgent(DMAIAgent):
123
124
 
124
125
  await asyncio.gather(*tasks)
125
126
  return state
127
+
128
+ def as_tool(
129
+ self,
130
+ *,
131
+ description: str,
132
+ name: Optional[str] = None,
133
+ args_schema: Optional[Type[BaseModel]] = None,
134
+ ) -> BaseTool:
135
+ if not description:
136
+ raise ValueError("`description` is required for as_tool().")
137
+ tool_name = name if name else self._sanitize_tool_name(self._agent_name)
138
+ schema = args_schema if args_schema is not None else self._default_tool_args_schema()
139
+
140
+ async def _arun(query: str, **kw):
141
+ return await self.run(query)
142
+
143
+ def _run_sync(query: str, **kw):
144
+ return asyncio.run(self.run(query))
145
+
146
+ return StructuredTool.from_function(
147
+ name=tool_name,
148
+ description=description,
149
+ args_schema=schema,
150
+ func=_run_sync,
151
+ coroutine=_arun,
152
+ )
@@ -0,0 +1,73 @@
1
+ import base64
2
+ import mimetypes
3
+ from pathlib import Path
4
+ from typing import Optional, Union
5
+
6
+ from langchain_core.messages import HumanMessage
7
+
8
+
9
+ class InputImage:
10
+ """Cross-provider image input helper.
11
+
12
+ Builds a ``HumanMessage`` whose ``.content`` is a list of LangChain v1
13
+ standard content blocks — one optional ``{"type":"text"}`` followed by
14
+ one ``{"type":"image"}``. The same message is accepted by OpenAI,
15
+ Anthropic, and Gemini chat models when used through ``init_chat_model``.
16
+ """
17
+
18
+ @classmethod
19
+ def from_file(
20
+ cls,
21
+ path: Union[str, Path],
22
+ *,
23
+ text: Optional[str] = None,
24
+ mime_type: Optional[str] = None,
25
+ ) -> HumanMessage:
26
+ path = Path(path)
27
+ data = path.read_bytes()
28
+ if mime_type is None:
29
+ mime_type, _ = mimetypes.guess_type(str(path))
30
+ if mime_type is None:
31
+ raise ValueError(
32
+ f"Could not infer mime type for {path!s}. Pass mime_type explicitly."
33
+ )
34
+ return cls.from_bytes(data, mime_type=mime_type, text=text)
35
+
36
+ @classmethod
37
+ def from_url(cls, url: str, *, text: Optional[str] = None) -> HumanMessage:
38
+ block = {"type": "image", "url": str(url)}
39
+ return cls._build_message(block, text)
40
+
41
+ @classmethod
42
+ def from_base64(
43
+ cls,
44
+ data: str,
45
+ *,
46
+ mime_type: str,
47
+ text: Optional[str] = None,
48
+ ) -> HumanMessage:
49
+ if not mime_type:
50
+ raise ValueError("mime_type is required for from_base64().")
51
+ block = {"type": "image", "base64": str(data), "mime_type": str(mime_type)}
52
+ return cls._build_message(block, text)
53
+
54
+ @classmethod
55
+ def from_bytes(
56
+ cls,
57
+ data: bytes,
58
+ *,
59
+ mime_type: str,
60
+ text: Optional[str] = None,
61
+ ) -> HumanMessage:
62
+ if not isinstance(data, (bytes, bytearray)):
63
+ raise TypeError("from_bytes() expects bytes-like object.")
64
+ b64 = base64.b64encode(bytes(data)).decode("ascii")
65
+ return cls.from_base64(b64, mime_type=mime_type, text=text)
66
+
67
+ @staticmethod
68
+ def _build_message(image_block: dict, text: Optional[str]) -> HumanMessage:
69
+ content: list[dict] = []
70
+ if text:
71
+ content.append({"type": "text", "text": str(text)})
72
+ content.append(image_block)
73
+ return HumanMessage(content=content)
@@ -0,0 +1,60 @@
1
+ import base64
2
+ import urllib.request
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+ from typing import List, Optional, Union
6
+
7
+ from langchain_core.messages import BaseMessage
8
+
9
+
10
+ @dataclass
11
+ class OutputImage:
12
+ """Provider-agnostic representation of an AI-generated image."""
13
+
14
+ bytes: bytes
15
+ mime_type: str
16
+
17
+ def save(self, path: Union[str, Path]) -> Path:
18
+ path = Path(path)
19
+ path.write_bytes(self.bytes)
20
+ return path
21
+
22
+ def to_base64(self) -> str:
23
+ return base64.b64encode(self.bytes).decode("ascii")
24
+
25
+ def __repr__(self) -> str:
26
+ return f"OutputImage(mime_type={self.mime_type!r}, size={len(self.bytes)} bytes)"
27
+
28
+ @classmethod
29
+ def extract_from(cls, message: BaseMessage) -> List["OutputImage"]:
30
+ """Extract every image block from ``message.content_blocks``.
31
+
32
+ Inline base64 blocks decode directly. URL blocks are fetched via
33
+ ``urllib`` and the response's ``Content-Type`` is preferred over the
34
+ block's declared ``mime_type``. Blocks that fail to decode or download
35
+ are silently skipped — partial extraction beats raising on one bad block.
36
+ """
37
+ result: List["OutputImage"] = []
38
+ for block in message.content_blocks:
39
+ if not isinstance(block, dict) or block.get("type") != "image":
40
+ continue
41
+ mime_type = block.get("mime_type") or "image/png"
42
+ data: Optional[bytes] = None
43
+ if block.get("base64"):
44
+ try:
45
+ data = base64.b64decode(block["base64"])
46
+ except Exception:
47
+ continue
48
+ elif block.get("url"):
49
+ try:
50
+ with urllib.request.urlopen(block["url"]) as resp:
51
+ data = resp.read()
52
+ ct = resp.headers.get("Content-Type")
53
+ if ct:
54
+ mime_type = ct.split(";", 1)[0].strip()
55
+ except Exception:
56
+ continue
57
+ if data is None:
58
+ continue
59
+ result.append(cls(bytes=data, mime_type=mime_type))
60
+ return result
dm_aioaiagent/types.py CHANGED
@@ -9,27 +9,13 @@ AfterToolCallCallback = Callable[[str, dict, str], None]
9
9
  AsyncBeforeToolCallCallback = Callable[[str, dict], Coroutine[Any, Any, None]]
10
10
  AsyncAfterToolCallCallback = Callable[[str, dict, str], Coroutine[Any, Any, None]]
11
11
 
12
- class ImageMessageTextItem(TypedDict):
13
- type: Literal['text']
14
- text: str
15
-
16
-
17
- class ImageMessageImageItem(TypedDict):
18
- type: Literal['image_url']
19
- image_url: dict
20
-
21
-
22
- class ImageMessage(TypedDict):
23
- role: Literal["user"]
24
- content: list[Union[ImageMessageTextItem, ImageMessageImageItem]]
25
-
26
12
 
27
13
  class TextMessage(TypedDict):
28
14
  role: Literal["user", "ai"]
29
15
  content: str
30
16
 
31
17
 
32
- InputMessage = Union[TextMessage, ImageMessage, BaseMessage]
18
+ InputMessage = Union[TextMessage, BaseMessage]
33
19
 
34
20
 
35
21
  class State(TypedDict):
@@ -0,0 +1,387 @@
1
+ Metadata-Version: 2.4
2
+ Name: dm-aioaiagent
3
+ Version: 0.7.0
4
+ Summary: This is my custom aioaiagent client
5
+ Home-page: https://pypi.org/project/dm-aioaiagent
6
+ Author: dimka4621
7
+ Author-email: mismartconfig@gmail.com
8
+ Project-URL: GitHub, https://github.com/MykhLibs/dm-aioaiagent
9
+ Keywords: dm aioaiagent
10
+ Classifier: Programming Language :: Python :: 3.9
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Operating System :: OS Independent
13
+ Requires-Python: >=3.9
14
+ Description-Content-Type: text/markdown
15
+ Requires-Dist: dm-logger<0.7.0,>=0.6.6
16
+ Requires-Dist: python-dotenv>=1.0.0
17
+ Requires-Dist: pydantic<3.0.0,>=2.9.2
18
+ Requires-Dist: langchain<2.0.0,>=1.0.0
19
+ Requires-Dist: langchain-core<2.0.0,>=1.0.0
20
+ Requires-Dist: langchain-openai<2.0.0,>=1.0.0
21
+ Requires-Dist: langgraph<2.0.0,>=1.0.0
22
+ Requires-Dist: langsmith<1.0.0,>=0.4.0
23
+ Requires-Dist: grandalf<0.9.0,>=0.8.0
24
+ Provides-Extra: anthropic
25
+ Requires-Dist: langchain-anthropic<2.0.0,>=1.0.0; extra == "anthropic"
26
+ Provides-Extra: gemini
27
+ Requires-Dist: langchain-google-genai<5.0.0,>=4.0.0; extra == "gemini"
28
+ Requires-Dist: langchain-google-vertexai<4.0.0,>=3.0.0; extra == "gemini"
29
+ Provides-Extra: groq
30
+ Requires-Dist: langchain-groq<2.0.0,>=1.0.0; extra == "groq"
31
+ Provides-Extra: mistral
32
+ Requires-Dist: langchain-mistralai<2.0.0,>=1.0.0; extra == "mistral"
33
+ Provides-Extra: deepseek
34
+ Requires-Dist: langchain-deepseek<2.0.0,>=1.0.0; extra == "deepseek"
35
+ Provides-Extra: ollama
36
+ Requires-Dist: langchain-ollama<2.0.0,>=1.0.0; extra == "ollama"
37
+ Provides-Extra: all
38
+ Requires-Dist: langchain-anthropic<2.0.0,>=1.0.0; extra == "all"
39
+ Requires-Dist: langchain-google-genai<5.0.0,>=4.0.0; extra == "all"
40
+ Requires-Dist: langchain-google-vertexai<4.0.0,>=3.0.0; extra == "all"
41
+ Requires-Dist: langchain-groq<2.0.0,>=1.0.0; extra == "all"
42
+ Requires-Dist: langchain-mistralai<2.0.0,>=1.0.0; extra == "all"
43
+ Requires-Dist: langchain-deepseek<2.0.0,>=1.0.0; extra == "all"
44
+ Requires-Dist: langchain-ollama<2.0.0,>=1.0.0; extra == "all"
45
+ Provides-Extra: test
46
+ Requires-Dist: Pillow<12.0.0,>=10.0.0; extra == "test"
47
+ Dynamic: author
48
+ Dynamic: author-email
49
+ Dynamic: classifier
50
+ Dynamic: description
51
+ Dynamic: description-content-type
52
+ Dynamic: home-page
53
+ Dynamic: keywords
54
+ Dynamic: project-url
55
+ Dynamic: provides-extra
56
+ Dynamic: requires-dist
57
+ Dynamic: requires-python
58
+ Dynamic: summary
59
+
60
+ # DM-aioaiagent
61
+
62
+ ## Urls
63
+
64
+ * [PyPI](https://pypi.org/project/dm-aioaiagent)
65
+ * [GitHub](https://github.com/MykhLibs/dm-aioaiagent)
66
+
67
+ ### * Package contains both `asynchronous` and `synchronous` clients
68
+
69
+ ## Installation
70
+
71
+ By default, the package ships with **OpenAI** support. Other providers are optional extras:
72
+
73
+ ```bash
74
+ pip install dm-aioaiagent # OpenAI only
75
+ pip install dm-aioaiagent[anthropic] # + Anthropic
76
+ pip install dm-aioaiagent[anthropic,gemini] # several at once
77
+ pip install dm-aioaiagent[all] # every supported provider
78
+ ```
79
+
80
+ Available extras: `anthropic`, `gemini`, `groq`, `mistral`, `deepseek`, `ollama`, `all`.
81
+
82
+ If you call a model from a provider whose package is not installed, `init_chat_model` will raise an `ImportError` with the exact `pip install` command you need.
83
+
84
+ ## Providers
85
+
86
+ Provider resolution is delegated to LangChain's [`init_chat_model`](https://python.langchain.com/api_reference/langchain/chat_models/langchain.chat_models.base.init_chat_model.html) — the agent picks the provider automatically by model name prefix when possible. For everything else, use the `"provider:model"` mask.
87
+
88
+ ```python
89
+ # Auto-detected from model prefix (rules come from LangChain's init_chat_model)
90
+ agent = DMAioAIAgent(model="gpt-4o-mini") # → openai
91
+ agent = DMAioAIAgent(model="claude-3-5-sonnet-latest") # → anthropic
92
+ agent = DMAioAIAgent(model="gemini-2.0-flash") # → google_vertexai (see note below)
93
+
94
+ # Explicit provider via "provider:model" mask
95
+ agent = DMAioAIAgent(model="google_genai:gemini-2.0-flash")
96
+ agent = DMAioAIAgent(model="groq:llama-3.1-70b-versatile")
97
+ agent = DMAioAIAgent(model="mistralai:mistral-large-latest")
98
+ agent = DMAioAIAgent(model="deepseek:deepseek-chat")
99
+ agent = DMAioAIAgent(model="ollama:llama3.1")
100
+
101
+ # OpenAI-compatible gateway (OpenRouter, Together, vLLM, LiteLLM proxy, ...)
102
+ # Works without installing any extra — just point to the OpenAI-compatible URL.
103
+ agent = DMAioAIAgent(
104
+ model="meta-llama/llama-3.1-70b-instruct",
105
+ llm_provider_base_url="https://openrouter.ai/api/v1",
106
+ llm_provider_api_key="sk-or-...",
107
+ )
108
+ ```
109
+
110
+ > **Note about Gemini.** LangChain's auto-detect maps the `gemini*` prefix to **`google_vertexai`** (Google Cloud Vertex AI, requires a GCP service account). If you have a regular **Google AI Studio** API key (`GOOGLE_API_KEY`), use the `google_genai:` mask explicitly:
111
+ >
112
+ > ```python
113
+ > agent = DMAioAIAgent(model="google_genai:gemini-2.0-flash")
114
+ > ```
115
+
116
+ Supported provider keys for the `"provider:model"` mask (list inherited from LangChain): `openai`, `anthropic`, `azure_openai`, `azure_ai`, `google_vertexai`, `google_genai`, `bedrock`, `bedrock_converse`, `cohere`, `fireworks`, `together`, `mistralai`, `huggingface`, `groq`, `ollama`, `google_anthropic_vertex`, `deepseek`, `ibm`, `nvidia`, `xai`, `perplexity`.
117
+
118
+ ### Note about parallel tool calls
119
+
120
+ `parallel_tool_calls` is currently mapped only for **OpenAI** and **Anthropic** (their APIs use different formats). For other providers the parameter is silently ignored — extend per-provider mapping if you need it.
121
+
122
+ ## Usage
123
+
124
+ Analogue to `DMAioAIAgent` is the synchronous client `DMAIAgent`.
125
+
126
+ ### Windows Setup
127
+
128
+ ```python
129
+ import asyncio
130
+ import sys
131
+
132
+ if sys.platform == "win32":
133
+ asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
134
+ ```
135
+
136
+ ### Api Key Setup
137
+
138
+ Each provider reads its API key from a dedicated environment variable, e.g. `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `GOOGLE_API_KEY`, `GROQ_API_KEY`, `MISTRAL_API_KEY`, etc. Alternatively, pass the key explicitly via the `llm_provider_api_key` argument — useful for multi-tenant setups, custom gateways, or runtime key rotation.
139
+
140
+ **Use load_dotenv to load the `.env` file.**
141
+
142
+ ```python
143
+ from dotenv import load_dotenv
144
+ load_dotenv()
145
+ ```
146
+
147
+ ### Use agent *with* inner memory and run *single* message
148
+
149
+ By default, agent use inner memory to store the conversation history.
150
+
151
+ (You can set *max count messages in memory* by `max_memory_messages` init argument)
152
+
153
+ ```python
154
+ import asyncio
155
+ from dm_aioaiagent import DMAioAIAgent
156
+
157
+
158
+ async def main():
159
+ # define a system message
160
+ system_message = "Your custom system message with role, backstory and goal"
161
+
162
+ # (optional) define a list of tools, if you want to use them
163
+ tools = [...]
164
+
165
+ # define a openai model, default is "gpt-4o-mini"
166
+ model_name = "gpt-4o"
167
+
168
+ # create an agent
169
+ ai_agent = DMAioAIAgent(system_message, tools, model=model_name)
170
+ # if you don't want to see the input and output messages from agent
171
+ # you can set `input_output_logging=False` init argument
172
+
173
+ # call an agent
174
+ answer = await ai_agent.run("Hello!")
175
+
176
+ # call an agent
177
+ answer = await ai_agent.run("I want to know the weather in Kyiv")
178
+
179
+ # get full conversation history
180
+ conversation_history = ai_agent.memory_messages
181
+
182
+ # clear conversation history
183
+ ai_agent.clear_memory_messages()
184
+
185
+
186
+ if __name__ == "__main__":
187
+ asyncio.run(main())
188
+ ```
189
+
190
+ ### Use agent *without* inner memory and run *multiple* messages
191
+
192
+ If you want to control the memory of the agent, you can disable it by setting `is_memory_enabled=False`
193
+
194
+ ```python
195
+ import asyncio
196
+ from dm_aioaiagent import DMAioAIAgent
197
+
198
+
199
+ async def main():
200
+ # define a system message
201
+ system_message = "Your custom system message with role, backstory and goal"
202
+
203
+ # (optional) define a list of tools, if you want to use them
204
+ tools = [...]
205
+
206
+ # define a openai model, default is "gpt-4o-mini"
207
+ model_name = "gpt-4o"
208
+
209
+ # create an agent
210
+ ai_agent = DMAioAIAgent(system_message, tools, model=model_name,
211
+ is_memory_enabled=False)
212
+ # if you don't want to see the input and output messages from agent
213
+ # you can set input_output_logging=False
214
+
215
+ # define the conversation message(s)
216
+ messages = [
217
+ {"role": "user", "content": "Hello!"}
218
+ ]
219
+
220
+ # call an agent
221
+ new_messages = await ai_agent.run_messages(messages)
222
+
223
+ # add new_messages to messages
224
+ messages.extend(new_messages)
225
+
226
+ # define the next conversation message
227
+ messages.append(
228
+ {"role": "user", "content": "I want to know the weather in Kyiv"}
229
+ )
230
+
231
+ # call an agent
232
+ new_messages = await ai_agent.run_messages(messages)
233
+
234
+
235
+ if __name__ == "__main__":
236
+ asyncio.run(main())
237
+ ```
238
+
239
+ ### Working with images — input
240
+
241
+ Use the `InputImage` helper to attach an image to a user message in a way that works **across providers** (OpenAI, Anthropic, Gemini). Each factory returns a ready-to-send `HumanMessage` whose `.content` is a list of LangChain v1 standard content blocks.
242
+
243
+ ```python
244
+ from dm_aioaiagent import DMAIAgent, InputImage
245
+
246
+ agent = DMAIAgent(agent_name="image_vision", model="gpt-4o-mini")
247
+
248
+ # from a local file (mime type inferred from extension)
249
+ msg_file = InputImage.from_file("photo.png", text="What is in the picture?")
250
+
251
+ # from a remote URL
252
+ msg_url = InputImage.from_url("https://your.domain/image.png", text="Describe it.")
253
+
254
+ # from raw bytes / base64 (mime_type required)
255
+ with open("photo.png", "rb") as f:
256
+ msg_bytes = InputImage.from_bytes(f.read(), mime_type="image/png", text="Describe.")
257
+ msg_b64 = InputImage.from_base64("aGVsbG8=", mime_type="image/png")
258
+
259
+ answer = agent.run_messages([msg_file])
260
+ print(answer[-1].content_blocks) # list of standard blocks
261
+ ```
262
+
263
+ **Multiple images per turn.** Each factory builds **one** image message. To attach several images to a single user turn, pass several messages:
264
+
265
+ ```python
266
+ messages = [
267
+ InputImage.from_file("front.png", text="Compare these two views:"),
268
+ InputImage.from_file("back.png"),
269
+ ]
270
+ agent.run_messages(messages)
271
+ ```
272
+
273
+ > **`from_url` caveats.** Some providers (notably Anthropic and Gemini) may have stricter rules about remote URLs (allowed hosts, public reachability, redirects). When in doubt — read the file yourself and use `from_file` / `from_bytes`.
274
+
275
+ ### Image generation and edit
276
+
277
+ The agent can also produce images. The mechanism differs by provider, so two flavours of model are supported:
278
+
279
+ `enable_image_generation` is the **single master switch** for image output across providers — image generation is off by default, and you opt in with one flag. The flag's effect is provider-specific (different APIs underneath), but the semantics are uniform: turn it on → the agent can draw, leave it off → it can't.
280
+
281
+ #### OpenAI — `enable_image_generation=True`
282
+
283
+ Pass the flag to a normal chat-capable OpenAI model (`gpt-5`, `gpt-5-mini`, etc.). Under the hood the agent enables the **Responses API** and binds OpenAI's built-in `image_generation` tool — the model decides on its own when to call it. Plain text turns stay text.
284
+
285
+ ```python
286
+ from dm_aioaiagent import DMAIAgent, OutputImage
287
+
288
+ agent = DMAIAgent(model="gpt-5-mini", enable_image_generation=True)
289
+
290
+ agent.run("Draw a small red square on a white background.")
291
+
292
+ # Generated images surface on agent.images
293
+ for i, img in enumerate(agent.images):
294
+ img.save(f"out_{i}.png")
295
+ ```
296
+
297
+ The same flag can be combined with regular tools — they coexist. `enable_image_generation=True` is **safe** even when the user only asks for text: the model uses `tool_choice="auto"`.
298
+
299
+ > Older OpenAI models (`gpt-4o`, `gpt-4.1`, etc.) require **organization verification** at platform.openai.com before they will accept the `image_generation` tool. The `gpt-5` family works on a fresh API key without verification.
300
+
301
+ #### Gemini — image-output models + the same flag
302
+
303
+ For Gemini you pick a model whose name contains `image` — e.g. `gemini-2.5-flash-image` (Nano Banana) — **and** turn the flag on. The agent then injects `response_modalities=["IMAGE", "TEXT"]` so the model is allowed to draw.
304
+
305
+ ```python
306
+ agent = DMAIAgent(
307
+ model="google_genai:gemini-2.5-flash-image",
308
+ enable_image_generation=True,
309
+ )
310
+
311
+ agent.run("Generate a small red square.")
312
+ agent.images[0].save("out.png")
313
+ ```
314
+
315
+ If you pick a Gemini image model but forget the flag, the agent logs a warning (`"... is image-capable but enable_image_generation=False — set the flag to True to let it draw."`) and stays in text-only mode.
316
+
317
+ > **Heads up.** A Gemini image-output model is **not** a general chat model — it tends to draw on every turn, including plain greetings. For mixed workloads use a **two-agent pattern**: a chat agent with the image agent attached as a tool. See [`agent.as_tool()`](#agentas_tool) below.
318
+
319
+ #### Anthropic — vision only
320
+
321
+ Claude **cannot generate** images. If you pass `enable_image_generation=True` to a Claude model, the flag is silently ignored and a warning is logged. Image input (vision) works as usual.
322
+
323
+ ### Working with generated images — `OutputImage`
324
+
325
+ Generated images live in `agent.images` as `OutputImage` instances:
326
+
327
+ ```python
328
+ img = agent.images[0]
329
+ img.bytes # raw image bytes
330
+ img.mime_type # e.g. "image/png"
331
+ img.save("out.png")
332
+ img.to_base64()
333
+ ```
334
+
335
+ You can also extract images directly from any `AIMessage`:
336
+
337
+ ```python
338
+ from dm_aioaiagent import OutputImage
339
+ images = OutputImage.extract_from(response_message) # list[OutputImage]
340
+ ```
341
+
342
+ ### Image memory modes
343
+
344
+ Images in `agent.memory_messages` (the conversation history sent to the LLM on each turn) and in `agent.images` (the property exposing AI-generated images) follow the `image_memory_mode` constructor argument:
345
+
346
+ | Mode | Memory (history) | `agent.images` |
347
+ |---|---|---|
348
+ | `keep_last` *(default)* | last user-image kept; last AI-image kept; older → `[image]` / `[generated image]` placeholder | last AI-image kept; replaced when a new one arrives |
349
+ | `drop` | every image (user + AI) becomes a placeholder right after the turn | only the AI-image of the **current** turn (then wiped on the next call) |
350
+ | `keep_all` | nothing is stripped — full multimodal history | every AI-image accumulates |
351
+
352
+ ```python
353
+ agent = DMAIAgent(model="gpt-4o-mini", image_memory_mode="keep_last")
354
+ agent.run_messages([InputImage.from_file("photo.png", text="Describe.")])
355
+ agent.run("What colour was dominant?") # answers based on the image
356
+ ```
357
+
358
+ `agent.clear_memory_messages()` clears both `memory_messages` and `images`.
359
+
360
+ > Only **AI-generated** images populate `agent.images`. Images you upload via `InputImage` go into history per the rules above but are not exposed on the `images` property.
361
+
362
+ ### `agent.as_tool()`
363
+
364
+ Wrap any agent as a `StructuredTool` so a *parent* agent can call it like any other tool — the basis for multi-agent composition. Default name is derived from `agent_name` (lowercased, non-alphanumerics replaced with `_`); `description` is required.
365
+
366
+ ```python
367
+ from dm_aioaiagent import DMAIAgent
368
+
369
+ # specialised image agent
370
+ image_agent = DMAIAgent(
371
+ agent_name="image_drawer",
372
+ model="google_genai:gemini-2.5-flash-image",
373
+ enable_image_generation=True,
374
+ )
375
+
376
+ # chat agent that delegates drawing to the image agent
377
+ chat_agent = DMAIAgent(
378
+ model="google_genai:gemini-2.5-flash",
379
+ tools=[image_agent.as_tool(description="Generates an image from a text prompt.")],
380
+ )
381
+
382
+ chat_agent.run("Hi! Please draw a small red square.")
383
+ # the chat agent picks the tool, the image agent draws, image lands in image_agent.images
384
+ image_agent.images[0].save("out.png")
385
+ ```
386
+
387
+ The async client (`DMAioAIAgent.as_tool`) returns a tool with both `func` and `coroutine` set, so it can be invoked from sync or async parent agents.
@@ -0,0 +1,10 @@
1
+ dm_aioaiagent/__init__.py,sha256=HSrPWMzO3tl0Yvmrl__1ZO80hZ0ClHoUzTJbCNcclnE,147
2
+ dm_aioaiagent/ai_agent.py,sha256=TQLlTAM9AZInTnmqDqVeiwYBofYHMon1BEJ0OC54Tv0,22551
3
+ dm_aioaiagent/async_ai_agent.py,sha256=kKXTXu32O-nM98_zVynJB52rOU7TxQcgyuO3FvFqNKM,5808
4
+ dm_aioaiagent/input_image.py,sha256=Rb2VXgxwmZhUrMbSL3giOF2upOtD2SI7fpFYNNx7iWE,2399
5
+ dm_aioaiagent/output_image.py,sha256=V-MRYqUjC30udxPEAHU95bYWapuV7CaPq0DbcMeXzuo,2165
6
+ dm_aioaiagent/types.py,sha256=AvcxvT3YgDoq-9xfuELceGeVcltyONEBpgUMk3CEHVw,720
7
+ dm_aioaiagent-0.7.0.dist-info/METADATA,sha256=n48XBgmbsbZRTVNZU54v0MdTEa3cw1kkBCY84Ql3MhU,15583
8
+ dm_aioaiagent-0.7.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
9
+ dm_aioaiagent-0.7.0.dist-info/top_level.txt,sha256=CbasLH0KI7zA77XwT6JDCnmRascxKNGvUVV9MgYjHAU,14
10
+ dm_aioaiagent-0.7.0.dist-info/RECORD,,
@@ -1,15 +0,0 @@
1
- class OpenAIImageMessageContent(list):
2
- def __init__(self, image_url: str, text: str = None):
3
- content = []
4
- if isinstance(text, str):
5
- content.append({
6
- "type": "text",
7
- "text": text
8
- })
9
- content.append({
10
- "type": "image_url",
11
- "image_url": {
12
- "url": image_url
13
- }
14
- })
15
- super().__init__(content)
@@ -1,264 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: dm-aioaiagent
3
- Version: 0.6.0
4
- Summary: This is my custom aioaiagent client
5
- Home-page: https://pypi.org/project/dm-aioaiagent
6
- Author: dimka4621
7
- Author-email: mismartconfig@gmail.com
8
- Project-URL: GitHub, https://github.com/MykhLibs/dm-aioaiagent
9
- Keywords: dm aioaiagent
10
- Classifier: Programming Language :: Python :: 3.9
11
- Classifier: License :: OSI Approved :: MIT License
12
- Classifier: Operating System :: OS Independent
13
- Requires-Python: >=3.9
14
- Description-Content-Type: text/markdown
15
- Requires-Dist: dm-logger<0.7.0,>=0.6.6
16
- Requires-Dist: python-dotenv>=1.0.0
17
- Requires-Dist: pydantic<3.0.0,>=2.9.2
18
- Requires-Dist: langchain<0.4.0,>=0.3.0
19
- Requires-Dist: langchain-core<0.4.0,>=0.3.5
20
- Requires-Dist: langchain-community<0.4.0,>=0.3.0
21
- Requires-Dist: langchain-openai<0.4.0,>=0.3.0
22
- Requires-Dist: langgraph<0.4.0,>=0.3.23
23
- Requires-Dist: langsmith<0.4.0,>=0.3.45
24
- Requires-Dist: grandalf<0.9.0,>=0.8.0
25
- Provides-Extra: anthropic
26
- Requires-Dist: langchain-anthropic<0.4.0,>=0.3.0; extra == "anthropic"
27
- Provides-Extra: gemini
28
- Requires-Dist: langchain-google-genai<3.0.0,>=2.1.0; extra == "gemini"
29
- Provides-Extra: groq
30
- Requires-Dist: langchain-groq<0.4.0,>=0.3.0; extra == "groq"
31
- Provides-Extra: mistral
32
- Requires-Dist: langchain-mistralai<0.3.0,>=0.2.0; extra == "mistral"
33
- Provides-Extra: deepseek
34
- Requires-Dist: langchain-deepseek<0.2.0,>=0.1.0; extra == "deepseek"
35
- Provides-Extra: ollama
36
- Requires-Dist: langchain-ollama<0.4.0,>=0.3.0; extra == "ollama"
37
- Provides-Extra: all
38
- Requires-Dist: langchain-anthropic<0.4.0,>=0.3.0; extra == "all"
39
- Requires-Dist: langchain-google-genai<3.0.0,>=2.1.0; extra == "all"
40
- Requires-Dist: langchain-groq<0.4.0,>=0.3.0; extra == "all"
41
- Requires-Dist: langchain-mistralai<0.3.0,>=0.2.0; extra == "all"
42
- Requires-Dist: langchain-deepseek<0.2.0,>=0.1.0; extra == "all"
43
- Requires-Dist: langchain-ollama<0.4.0,>=0.3.0; extra == "all"
44
- Dynamic: author
45
- Dynamic: author-email
46
- Dynamic: classifier
47
- Dynamic: description
48
- Dynamic: description-content-type
49
- Dynamic: home-page
50
- Dynamic: keywords
51
- Dynamic: project-url
52
- Dynamic: provides-extra
53
- Dynamic: requires-dist
54
- Dynamic: requires-python
55
- Dynamic: summary
56
-
57
- # DM-aioaiagent
58
-
59
- ## Urls
60
-
61
- * [PyPI](https://pypi.org/project/dm-aioaiagent)
62
- * [GitHub](https://github.com/MykhLibs/dm-aioaiagent)
63
-
64
- ### * Package contains both `asynchronous` and `synchronous` clients
65
-
66
- ## Installation
67
-
68
- By default, the package ships with **OpenAI** support. Other providers are optional extras:
69
-
70
- ```bash
71
- pip install dm-aioaiagent # OpenAI only
72
- pip install dm-aioaiagent[anthropic] # + Anthropic
73
- pip install dm-aioaiagent[anthropic,gemini] # several at once
74
- pip install dm-aioaiagent[all] # every supported provider
75
- ```
76
-
77
- Available extras: `anthropic`, `gemini`, `groq`, `mistral`, `deepseek`, `ollama`, `all`.
78
-
79
- If you call a model from a provider whose package is not installed, `init_chat_model` will raise an `ImportError` with the exact `pip install` command you need.
80
-
81
- ## Providers
82
-
83
- Provider resolution is delegated to LangChain's [`init_chat_model`](https://python.langchain.com/api_reference/langchain/chat_models/langchain.chat_models.base.init_chat_model.html) — the agent picks the provider automatically by model name prefix when possible. For everything else, use the `"provider:model"` mask.
84
-
85
- ```python
86
- # Auto-detected from model prefix (rules come from LangChain's init_chat_model)
87
- agent = DMAioAIAgent(model="gpt-4o-mini") # → openai
88
- agent = DMAioAIAgent(model="claude-3-5-sonnet-latest") # → anthropic
89
- agent = DMAioAIAgent(model="gemini-2.0-flash") # → google_vertexai (see note below)
90
-
91
- # Explicit provider via "provider:model" mask
92
- agent = DMAioAIAgent(model="google_genai:gemini-2.0-flash")
93
- agent = DMAioAIAgent(model="groq:llama-3.1-70b-versatile")
94
- agent = DMAioAIAgent(model="mistralai:mistral-large-latest")
95
- agent = DMAioAIAgent(model="deepseek:deepseek-chat")
96
- agent = DMAioAIAgent(model="ollama:llama3.1")
97
-
98
- # OpenAI-compatible gateway (OpenRouter, Together, vLLM, LiteLLM proxy, ...)
99
- # Works without installing any extra — just point to the OpenAI-compatible URL.
100
- agent = DMAioAIAgent(
101
- model="meta-llama/llama-3.1-70b-instruct",
102
- llm_provider_base_url="https://openrouter.ai/api/v1",
103
- llm_provider_api_key="sk-or-...",
104
- )
105
- ```
106
-
107
- > **Note about Gemini.** LangChain's auto-detect maps the `gemini*` prefix to **`google_vertexai`** (Google Cloud Vertex AI, requires a GCP service account). If you have a regular **Google AI Studio** API key (`GOOGLE_API_KEY`), use the `google_genai:` mask explicitly:
108
- >
109
- > ```python
110
- > agent = DMAioAIAgent(model="google_genai:gemini-2.0-flash")
111
- > ```
112
-
113
- Supported provider keys for the `"provider:model"` mask (list inherited from LangChain): `openai`, `anthropic`, `azure_openai`, `azure_ai`, `google_vertexai`, `google_genai`, `bedrock`, `bedrock_converse`, `cohere`, `fireworks`, `together`, `mistralai`, `huggingface`, `groq`, `ollama`, `google_anthropic_vertex`, `deepseek`, `ibm`, `nvidia`, `xai`, `perplexity`.
114
-
115
- ### Note about parallel tool calls
116
-
117
- `parallel_tool_calls` is currently mapped only for **OpenAI** and **Anthropic** (their APIs use different formats). For other providers the parameter is silently ignored — extend per-provider mapping if you need it.
118
-
119
- ## Usage
120
-
121
- Analogue to `DMAioAIAgent` is the synchronous client `DMAIAgent`.
122
-
123
- ### Windows Setup
124
-
125
- ```python
126
- import asyncio
127
- import sys
128
-
129
- if sys.platform == "win32":
130
- asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
131
- ```
132
-
133
- ### Api Key Setup
134
-
135
- Each provider reads its API key from a dedicated environment variable, e.g. `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `GOOGLE_API_KEY`, `GROQ_API_KEY`, `MISTRAL_API_KEY`, etc. Alternatively, pass the key explicitly via the `llm_provider_api_key` argument — useful for multi-tenant setups, custom gateways, or runtime key rotation.
136
-
137
- **Use load_dotenv to load the `.env` file.**
138
-
139
- ```python
140
- from dotenv import load_dotenv
141
- load_dotenv()
142
- ```
143
-
144
- ### Use agent *with* inner memory and run *single* message
145
-
146
- By default, agent use inner memory to store the conversation history.
147
-
148
- (You can set *max count messages in memory* by `max_memory_messages` init argument)
149
-
150
- ```python
151
- import asyncio
152
- from dm_aioaiagent import DMAioAIAgent
153
-
154
-
155
- async def main():
156
- # define a system message
157
- system_message = "Your custom system message with role, backstory and goal"
158
-
159
- # (optional) define a list of tools, if you want to use them
160
- tools = [...]
161
-
162
- # define a openai model, default is "gpt-4o-mini"
163
- model_name = "gpt-4o"
164
-
165
- # create an agent
166
- ai_agent = DMAioAIAgent(system_message, tools, model=model_name)
167
- # if you don't want to see the input and output messages from agent
168
- # you can set `input_output_logging=False` init argument
169
-
170
- # call an agent
171
- answer = await ai_agent.run("Hello!")
172
-
173
- # call an agent
174
- answer = await ai_agent.run("I want to know the weather in Kyiv")
175
-
176
- # get full conversation history
177
- conversation_history = ai_agent.memory_messages
178
-
179
- # clear conversation history
180
- ai_agent.clear_memory_messages()
181
-
182
-
183
- if __name__ == "__main__":
184
- asyncio.run(main())
185
- ```
186
-
187
- ### Use agent *without* inner memory and run *multiple* messages
188
-
189
- If you want to control the memory of the agent, you can disable it by setting `is_memory_enabled=False`
190
-
191
- ```python
192
- import asyncio
193
- from dm_aioaiagent import DMAioAIAgent
194
-
195
-
196
- async def main():
197
- # define a system message
198
- system_message = "Your custom system message with role, backstory and goal"
199
-
200
- # (optional) define a list of tools, if you want to use them
201
- tools = [...]
202
-
203
- # define a openai model, default is "gpt-4o-mini"
204
- model_name = "gpt-4o"
205
-
206
- # create an agent
207
- ai_agent = DMAioAIAgent(system_message, tools, model=model_name,
208
- is_memory_enabled=False)
209
- # if you don't want to see the input and output messages from agent
210
- # you can set input_output_logging=False
211
-
212
- # define the conversation message(s)
213
- messages = [
214
- {"role": "user", "content": "Hello!"}
215
- ]
216
-
217
- # call an agent
218
- new_messages = await ai_agent.run_messages(messages)
219
-
220
- # add new_messages to messages
221
- messages.extend(new_messages)
222
-
223
- # define the next conversation message
224
- messages.append(
225
- {"role": "user", "content": "I want to know the weather in Kyiv"}
226
- )
227
-
228
- # call an agent
229
- new_messages = await ai_agent.run_messages(messages)
230
-
231
-
232
- if __name__ == "__main__":
233
- asyncio.run(main())
234
- ```
235
-
236
- ### Image vision
237
-
238
- ```python
239
- from dm_aioaiagent import DMAIAgent, OpenAIImageMessageContent
240
-
241
-
242
- def main():
243
- # create an agent
244
- ai_agent = DMAIAgent(agent_name="image_vision", model="gpt-4o")
245
-
246
- # create an image message content
247
- # NOTE: text argument is optional
248
- img_content = OpenAIImageMessageContent(image_url="https://your.domain/image",
249
- text="Hello, what is shown in the photo?")
250
-
251
- # define the conversation messages
252
- messages = [
253
- {"role": "user", "content": "Hello!"},
254
- {"role": "user", "content": img_content},
255
- ]
256
-
257
- # call an agent
258
- new_messages = ai_agent.run_messages(messages)
259
- answer = new_messages[-1].content
260
-
261
-
262
- if __name__ == "__main__":
263
- main()
264
- ```
@@ -1,9 +0,0 @@
1
- dm_aioaiagent/__init__.py,sha256=VuBGUpDb9woPGO989Otb54ngJSuDzKJOjxy7s3V3V5M,141
2
- dm_aioaiagent/ai_agent.py,sha256=gZpyfFinYEQ6JgfMqfRvXtTtbkt_goJmtIb0L_fWYDU,13960
3
- dm_aioaiagent/async_ai_agent.py,sha256=zNw_4lkzQMmDdwhhrgWQ5ZuzqsOvj5KDrARLTY_dgYo,4866
4
- dm_aioaiagent/openai_image_message_content.py,sha256=EP_i0ERCz7c4KOM8UXp2-AI91ntGC7PGQBc5MMNspcs,434
5
- dm_aioaiagent/types.py,sha256=XKnKLRPiFGmoMq3vjoV6fxWZ8BiHilM4jvkQU1dO_2c,1036
6
- dm_aioaiagent-0.6.0.dist-info/METADATA,sha256=muLKMEvd4h2Cy3fvxqhJN8bA0pycPlTFAbGVrQ2aPBM,9214
7
- dm_aioaiagent-0.6.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
8
- dm_aioaiagent-0.6.0.dist-info/top_level.txt,sha256=CbasLH0KI7zA77XwT6JDCnmRascxKNGvUVV9MgYjHAU,14
9
- dm_aioaiagent-0.6.0.dist-info/RECORD,,