ag2 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ag2 might be problematic. Click here for more details.

Files changed (112) hide show
  1. ag2-0.3.2.dist-info/LICENSE +201 -0
  2. ag2-0.3.2.dist-info/METADATA +490 -0
  3. ag2-0.3.2.dist-info/NOTICE.md +19 -0
  4. ag2-0.3.2.dist-info/RECORD +112 -0
  5. ag2-0.3.2.dist-info/WHEEL +5 -0
  6. ag2-0.3.2.dist-info/top_level.txt +1 -0
  7. autogen/__init__.py +17 -0
  8. autogen/_pydantic.py +116 -0
  9. autogen/agentchat/__init__.py +26 -0
  10. autogen/agentchat/agent.py +142 -0
  11. autogen/agentchat/assistant_agent.py +85 -0
  12. autogen/agentchat/chat.py +306 -0
  13. autogen/agentchat/contrib/__init__.py +0 -0
  14. autogen/agentchat/contrib/agent_builder.py +785 -0
  15. autogen/agentchat/contrib/agent_optimizer.py +450 -0
  16. autogen/agentchat/contrib/capabilities/__init__.py +0 -0
  17. autogen/agentchat/contrib/capabilities/agent_capability.py +21 -0
  18. autogen/agentchat/contrib/capabilities/generate_images.py +297 -0
  19. autogen/agentchat/contrib/capabilities/teachability.py +406 -0
  20. autogen/agentchat/contrib/capabilities/text_compressors.py +72 -0
  21. autogen/agentchat/contrib/capabilities/transform_messages.py +92 -0
  22. autogen/agentchat/contrib/capabilities/transforms.py +565 -0
  23. autogen/agentchat/contrib/capabilities/transforms_util.py +120 -0
  24. autogen/agentchat/contrib/capabilities/vision_capability.py +217 -0
  25. autogen/agentchat/contrib/gpt_assistant_agent.py +545 -0
  26. autogen/agentchat/contrib/graph_rag/__init__.py +0 -0
  27. autogen/agentchat/contrib/graph_rag/document.py +24 -0
  28. autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py +76 -0
  29. autogen/agentchat/contrib/graph_rag/graph_query_engine.py +50 -0
  30. autogen/agentchat/contrib/graph_rag/graph_rag_capability.py +56 -0
  31. autogen/agentchat/contrib/img_utils.py +390 -0
  32. autogen/agentchat/contrib/llamaindex_conversable_agent.py +114 -0
  33. autogen/agentchat/contrib/llava_agent.py +176 -0
  34. autogen/agentchat/contrib/math_user_proxy_agent.py +471 -0
  35. autogen/agentchat/contrib/multimodal_conversable_agent.py +128 -0
  36. autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py +325 -0
  37. autogen/agentchat/contrib/retrieve_assistant_agent.py +56 -0
  38. autogen/agentchat/contrib/retrieve_user_proxy_agent.py +701 -0
  39. autogen/agentchat/contrib/society_of_mind_agent.py +203 -0
  40. autogen/agentchat/contrib/text_analyzer_agent.py +76 -0
  41. autogen/agentchat/contrib/vectordb/__init__.py +0 -0
  42. autogen/agentchat/contrib/vectordb/base.py +243 -0
  43. autogen/agentchat/contrib/vectordb/chromadb.py +326 -0
  44. autogen/agentchat/contrib/vectordb/mongodb.py +559 -0
  45. autogen/agentchat/contrib/vectordb/pgvectordb.py +958 -0
  46. autogen/agentchat/contrib/vectordb/qdrant.py +334 -0
  47. autogen/agentchat/contrib/vectordb/utils.py +126 -0
  48. autogen/agentchat/contrib/web_surfer.py +305 -0
  49. autogen/agentchat/conversable_agent.py +2904 -0
  50. autogen/agentchat/groupchat.py +1666 -0
  51. autogen/agentchat/user_proxy_agent.py +109 -0
  52. autogen/agentchat/utils.py +207 -0
  53. autogen/browser_utils.py +291 -0
  54. autogen/cache/__init__.py +10 -0
  55. autogen/cache/abstract_cache_base.py +78 -0
  56. autogen/cache/cache.py +182 -0
  57. autogen/cache/cache_factory.py +85 -0
  58. autogen/cache/cosmos_db_cache.py +150 -0
  59. autogen/cache/disk_cache.py +109 -0
  60. autogen/cache/in_memory_cache.py +61 -0
  61. autogen/cache/redis_cache.py +128 -0
  62. autogen/code_utils.py +745 -0
  63. autogen/coding/__init__.py +22 -0
  64. autogen/coding/base.py +113 -0
  65. autogen/coding/docker_commandline_code_executor.py +262 -0
  66. autogen/coding/factory.py +45 -0
  67. autogen/coding/func_with_reqs.py +203 -0
  68. autogen/coding/jupyter/__init__.py +22 -0
  69. autogen/coding/jupyter/base.py +32 -0
  70. autogen/coding/jupyter/docker_jupyter_server.py +164 -0
  71. autogen/coding/jupyter/embedded_ipython_code_executor.py +182 -0
  72. autogen/coding/jupyter/jupyter_client.py +224 -0
  73. autogen/coding/jupyter/jupyter_code_executor.py +161 -0
  74. autogen/coding/jupyter/local_jupyter_server.py +168 -0
  75. autogen/coding/local_commandline_code_executor.py +410 -0
  76. autogen/coding/markdown_code_extractor.py +44 -0
  77. autogen/coding/utils.py +57 -0
  78. autogen/exception_utils.py +46 -0
  79. autogen/extensions/__init__.py +0 -0
  80. autogen/formatting_utils.py +76 -0
  81. autogen/function_utils.py +362 -0
  82. autogen/graph_utils.py +148 -0
  83. autogen/io/__init__.py +15 -0
  84. autogen/io/base.py +105 -0
  85. autogen/io/console.py +43 -0
  86. autogen/io/websockets.py +213 -0
  87. autogen/logger/__init__.py +11 -0
  88. autogen/logger/base_logger.py +140 -0
  89. autogen/logger/file_logger.py +287 -0
  90. autogen/logger/logger_factory.py +29 -0
  91. autogen/logger/logger_utils.py +42 -0
  92. autogen/logger/sqlite_logger.py +459 -0
  93. autogen/math_utils.py +356 -0
  94. autogen/oai/__init__.py +33 -0
  95. autogen/oai/anthropic.py +428 -0
  96. autogen/oai/bedrock.py +600 -0
  97. autogen/oai/cerebras.py +264 -0
  98. autogen/oai/client.py +1148 -0
  99. autogen/oai/client_utils.py +167 -0
  100. autogen/oai/cohere.py +453 -0
  101. autogen/oai/completion.py +1216 -0
  102. autogen/oai/gemini.py +469 -0
  103. autogen/oai/groq.py +281 -0
  104. autogen/oai/mistral.py +279 -0
  105. autogen/oai/ollama.py +576 -0
  106. autogen/oai/openai_utils.py +810 -0
  107. autogen/oai/together.py +343 -0
  108. autogen/retrieve_utils.py +487 -0
  109. autogen/runtime_logging.py +163 -0
  110. autogen/token_count_utils.py +257 -0
  111. autogen/types.py +20 -0
  112. autogen/version.py +7 -0
@@ -0,0 +1,217 @@
1
+ # Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ # Portions derived from https://github.com/microsoft/autogen are under the MIT License.
6
+ # SPDX-License-Identifier: MIT
7
+ import copy
8
+ from typing import Callable, Dict, List, Optional, Union
9
+
10
+ from autogen.agentchat.assistant_agent import ConversableAgent
11
+ from autogen.agentchat.contrib.capabilities.agent_capability import AgentCapability
12
+ from autogen.agentchat.contrib.img_utils import (
13
+ convert_base64_to_data_uri,
14
+ get_image_data,
15
+ get_pil_image,
16
+ gpt4v_formatter,
17
+ message_formatter_pil_to_b64,
18
+ )
19
+ from autogen.agentchat.contrib.multimodal_conversable_agent import MultimodalConversableAgent
20
+ from autogen.agentchat.conversable_agent import colored
21
+ from autogen.code_utils import content_str
22
+ from autogen.oai.client import OpenAIWrapper
23
+
24
+ DEFAULT_DESCRIPTION_PROMPT = (
25
+ "Write a detailed caption for this image. "
26
+ "Pay special attention to any details that might be useful or relevant "
27
+ "to the ongoing conversation."
28
+ )
29
+
30
+
31
+ class VisionCapability(AgentCapability):
32
+ """We can add vision capability to regular ConversableAgent, even if the agent does not have the multimodal capability,
33
+ such as GPT-3.5-turbo agent, Llama, Orca, or Mistral agents. This vision capability will invoke a LMM client to describe
34
+ the image (captioning) before sending the information to the agent's actual client.
35
+
36
+ The vision capability will hook to the ConversableAgent's `process_last_received_message`.
37
+
38
+ Some technical details:
39
+ When the agent (who has the vision capability) received an message, it will:
40
+ 1. _process_received_message:
41
+ a. _append_oai_message
42
+ 2. generate_reply: if the agent is a MultimodalAgent, it will also use the image tag.
43
+ a. hook process_last_received_message (NOTE: this is where the vision capability will be hooked to.)
44
+ b. hook process_all_messages_before_reply
45
+ 3. send:
46
+ a. hook process_message_before_send
47
+ b. _append_oai_message
48
+ """
49
+
50
+ def __init__(
51
+ self,
52
+ lmm_config: Dict,
53
+ description_prompt: Optional[str] = DEFAULT_DESCRIPTION_PROMPT,
54
+ custom_caption_func: Callable = None,
55
+ ) -> None:
56
+ """
57
+ Initializes a new instance, setting up the configuration for interacting with
58
+ a Language Multimodal (LMM) client and specifying optional parameters for image
59
+ description and captioning.
60
+
61
+ Args:
62
+ lmm_config (Dict): Configuration for the LMM client, which is used to call
63
+ the LMM service for describing the image. This must be a dictionary containing
64
+ the necessary configuration parameters. If `lmm_config` is False or an empty dictionary,
65
+ it is considered invalid, and initialization will assert.
66
+ description_prompt (Optional[str], optional): The prompt to use for generating
67
+ descriptions of the image. This parameter allows customization of the
68
+ prompt passed to the LMM service. Defaults to `DEFAULT_DESCRIPTION_PROMPT` if not provided.
69
+ custom_caption_func (Callable, optional): A callable that, if provided, will be used
70
+ to generate captions for images. This allows for custom captioning logic outside
71
+ of the standard LMM service interaction.
72
+ The callable should take three parameters as input:
73
+ 1. an image URL (or local location)
74
+ 2. image_data (a PIL image)
75
+ 3. lmm_client (to call remote LMM)
76
+ and then return a description (as string).
77
+ If not provided, captioning will rely on the LMM client configured via `lmm_config`.
78
+ If provided, we will not run the default self._get_image_caption method.
79
+
80
+ Raises:
81
+ AssertionError: If neither a valid `lmm_config` nor a `custom_caption_func` is provided,
82
+ an AssertionError is raised to indicate that the Vision Capability requires
83
+ one of these to be valid for operation.
84
+ """
85
+ self._lmm_config = lmm_config
86
+ self._description_prompt = description_prompt
87
+ self._parent_agent = None
88
+
89
+ if lmm_config:
90
+ self._lmm_client = OpenAIWrapper(**lmm_config)
91
+ else:
92
+ self._lmm_client = None
93
+
94
+ self._custom_caption_func = custom_caption_func
95
+ assert (
96
+ self._lmm_config or custom_caption_func
97
+ ), "Vision Capability requires a valid lmm_config or custom_caption_func."
98
+
99
+ def add_to_agent(self, agent: ConversableAgent) -> None:
100
+ self._parent_agent = agent
101
+
102
+ # Append extra info to the system message.
103
+ agent.update_system_message(agent.system_message + "\nYou've been given the ability to interpret images.")
104
+
105
+ # Register a hook for processing the last message.
106
+ agent.register_hook(hookable_method="process_last_received_message", hook=self.process_last_received_message)
107
+
108
+ def process_last_received_message(self, content: Union[str, List[dict]]) -> str:
109
+ """
110
+ Processes the last received message content by normalizing and augmenting it
111
+ with descriptions of any included images. The function supports input content
112
+ as either a string or a list of dictionaries, where each dictionary represents
113
+ a content item (e.g., text, image). If the content contains image URLs, it
114
+ fetches the image data, generates a caption for each image, and inserts the
115
+ caption into the augmented content.
116
+
117
+ The function aims to transform the content into a format compatible with GPT-4V
118
+ multimodal inputs, specifically by formatting strings into PIL-compatible
119
+ images if needed and appending text descriptions for images. This allows for
120
+ a more accessible presentation of the content, especially in contexts where
121
+ images cannot be displayed directly.
122
+
123
+ Args:
124
+ content (Union[str, List[dict]]): The last received message content, which
125
+ can be a plain text string or a list of dictionaries representing
126
+ different types of content items (e.g., text, image_url).
127
+
128
+ Returns:
129
+ str: The augmented message content
130
+
131
+ Raises:
132
+ AssertionError: If an item in the content list is not a dictionary.
133
+
134
+ Examples:
135
+ Assuming `self._get_image_caption(img_data)` returns
136
+ "A beautiful sunset over the mountains" for the image.
137
+
138
+ - Input as String:
139
+ content = "Check out this cool photo!"
140
+ Output: "Check out this cool photo!"
141
+ (Content is a string without an image, remains unchanged.)
142
+
143
+ - Input as String, with image location:
144
+ content = "What's weather in this cool photo: <img http://example.com/photo.jpg>"
145
+ Output: "What's weather in this cool photo: <img http://example.com/photo.jpg> in case you can not see, the caption of this image is:
146
+ A beautiful sunset over the mountains\n"
147
+ (Caption added after the image)
148
+
149
+ - Input as List with Text Only:
150
+ content = [{"type": "text", "text": "Here's an interesting fact."}]
151
+ Output: "Here's an interesting fact."
152
+ (No images in the content, it remains unchanged.)
153
+
154
+ - Input as List with Image URL:
155
+ content = [
156
+ {"type": "text", "text": "What's weather in this cool photo:"},
157
+ {"type": "image_url", "image_url": {"url": "http://example.com/photo.jpg"}}
158
+ ]
159
+ Output: "What's weather in this cool photo: <img http://example.com/photo.jpg> in case you can not see, the caption of this image is:
160
+ A beautiful sunset over the mountains\n"
161
+ (Caption added after the image)
162
+ """
163
+ copy.deepcopy(content)
164
+ # normalize the content into the gpt-4v format for multimodal
165
+ # we want to keep the URL format to keep it concise.
166
+ if isinstance(content, str):
167
+ content = gpt4v_formatter(content, img_format="url")
168
+
169
+ aug_content: str = ""
170
+ for item in content:
171
+ assert isinstance(item, dict)
172
+ if item["type"] == "text":
173
+ aug_content += item["text"]
174
+ elif item["type"] == "image_url":
175
+ img_url = item["image_url"]["url"]
176
+ img_caption = ""
177
+
178
+ if self._custom_caption_func:
179
+ img_caption = self._custom_caption_func(img_url, get_pil_image(img_url), self._lmm_client)
180
+ elif self._lmm_client:
181
+ img_data = get_image_data(img_url)
182
+ img_caption = self._get_image_caption(img_data)
183
+ else:
184
+ img_caption = ""
185
+
186
+ aug_content += f"<img {img_url}> in case you can not see, the caption of this image is: {img_caption}\n"
187
+ else:
188
+ print(f"Warning: the input type should either be `test` or `image_url`. Skip {item['type']} here.")
189
+
190
+ return aug_content
191
+
192
+ def _get_image_caption(self, img_data: str) -> str:
193
+ """
194
+ Args:
195
+ img_data (str): base64 encoded image data.
196
+ Returns:
197
+ str: caption for the given image.
198
+ """
199
+ response = self._lmm_client.create(
200
+ context=None,
201
+ messages=[
202
+ {
203
+ "role": "user",
204
+ "content": [
205
+ {"type": "text", "text": self._description_prompt},
206
+ {
207
+ "type": "image_url",
208
+ "image_url": {
209
+ "url": convert_base64_to_data_uri(img_data),
210
+ },
211
+ },
212
+ ],
213
+ }
214
+ ],
215
+ )
216
+ description = response.choices[0].message.content
217
+ return content_str(description)