ag2 0.3.2b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ag2 might be problematic. Click here for more details.

Files changed (112) hide show
  1. ag2-0.3.2b2.dist-info/LICENSE +201 -0
  2. ag2-0.3.2b2.dist-info/METADATA +490 -0
  3. ag2-0.3.2b2.dist-info/NOTICE.md +19 -0
  4. ag2-0.3.2b2.dist-info/RECORD +112 -0
  5. ag2-0.3.2b2.dist-info/WHEEL +5 -0
  6. ag2-0.3.2b2.dist-info/top_level.txt +1 -0
  7. autogen/__init__.py +17 -0
  8. autogen/_pydantic.py +116 -0
  9. autogen/agentchat/__init__.py +26 -0
  10. autogen/agentchat/agent.py +142 -0
  11. autogen/agentchat/assistant_agent.py +85 -0
  12. autogen/agentchat/chat.py +306 -0
  13. autogen/agentchat/contrib/__init__.py +0 -0
  14. autogen/agentchat/contrib/agent_builder.py +785 -0
  15. autogen/agentchat/contrib/agent_optimizer.py +450 -0
  16. autogen/agentchat/contrib/capabilities/__init__.py +0 -0
  17. autogen/agentchat/contrib/capabilities/agent_capability.py +21 -0
  18. autogen/agentchat/contrib/capabilities/generate_images.py +297 -0
  19. autogen/agentchat/contrib/capabilities/teachability.py +406 -0
  20. autogen/agentchat/contrib/capabilities/text_compressors.py +72 -0
  21. autogen/agentchat/contrib/capabilities/transform_messages.py +92 -0
  22. autogen/agentchat/contrib/capabilities/transforms.py +565 -0
  23. autogen/agentchat/contrib/capabilities/transforms_util.py +120 -0
  24. autogen/agentchat/contrib/capabilities/vision_capability.py +217 -0
  25. autogen/agentchat/contrib/gpt_assistant_agent.py +545 -0
  26. autogen/agentchat/contrib/graph_rag/__init__.py +0 -0
  27. autogen/agentchat/contrib/graph_rag/document.py +24 -0
  28. autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py +76 -0
  29. autogen/agentchat/contrib/graph_rag/graph_query_engine.py +50 -0
  30. autogen/agentchat/contrib/graph_rag/graph_rag_capability.py +56 -0
  31. autogen/agentchat/contrib/img_utils.py +390 -0
  32. autogen/agentchat/contrib/llamaindex_conversable_agent.py +114 -0
  33. autogen/agentchat/contrib/llava_agent.py +176 -0
  34. autogen/agentchat/contrib/math_user_proxy_agent.py +471 -0
  35. autogen/agentchat/contrib/multimodal_conversable_agent.py +128 -0
  36. autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py +325 -0
  37. autogen/agentchat/contrib/retrieve_assistant_agent.py +56 -0
  38. autogen/agentchat/contrib/retrieve_user_proxy_agent.py +701 -0
  39. autogen/agentchat/contrib/society_of_mind_agent.py +203 -0
  40. autogen/agentchat/contrib/text_analyzer_agent.py +76 -0
  41. autogen/agentchat/contrib/vectordb/__init__.py +0 -0
  42. autogen/agentchat/contrib/vectordb/base.py +243 -0
  43. autogen/agentchat/contrib/vectordb/chromadb.py +326 -0
  44. autogen/agentchat/contrib/vectordb/mongodb.py +559 -0
  45. autogen/agentchat/contrib/vectordb/pgvectordb.py +958 -0
  46. autogen/agentchat/contrib/vectordb/qdrant.py +334 -0
  47. autogen/agentchat/contrib/vectordb/utils.py +126 -0
  48. autogen/agentchat/contrib/web_surfer.py +305 -0
  49. autogen/agentchat/conversable_agent.py +2904 -0
  50. autogen/agentchat/groupchat.py +1666 -0
  51. autogen/agentchat/user_proxy_agent.py +109 -0
  52. autogen/agentchat/utils.py +207 -0
  53. autogen/browser_utils.py +291 -0
  54. autogen/cache/__init__.py +10 -0
  55. autogen/cache/abstract_cache_base.py +78 -0
  56. autogen/cache/cache.py +182 -0
  57. autogen/cache/cache_factory.py +85 -0
  58. autogen/cache/cosmos_db_cache.py +150 -0
  59. autogen/cache/disk_cache.py +109 -0
  60. autogen/cache/in_memory_cache.py +61 -0
  61. autogen/cache/redis_cache.py +128 -0
  62. autogen/code_utils.py +745 -0
  63. autogen/coding/__init__.py +22 -0
  64. autogen/coding/base.py +113 -0
  65. autogen/coding/docker_commandline_code_executor.py +262 -0
  66. autogen/coding/factory.py +45 -0
  67. autogen/coding/func_with_reqs.py +203 -0
  68. autogen/coding/jupyter/__init__.py +22 -0
  69. autogen/coding/jupyter/base.py +32 -0
  70. autogen/coding/jupyter/docker_jupyter_server.py +164 -0
  71. autogen/coding/jupyter/embedded_ipython_code_executor.py +182 -0
  72. autogen/coding/jupyter/jupyter_client.py +224 -0
  73. autogen/coding/jupyter/jupyter_code_executor.py +161 -0
  74. autogen/coding/jupyter/local_jupyter_server.py +168 -0
  75. autogen/coding/local_commandline_code_executor.py +410 -0
  76. autogen/coding/markdown_code_extractor.py +44 -0
  77. autogen/coding/utils.py +57 -0
  78. autogen/exception_utils.py +46 -0
  79. autogen/extensions/__init__.py +0 -0
  80. autogen/formatting_utils.py +76 -0
  81. autogen/function_utils.py +362 -0
  82. autogen/graph_utils.py +148 -0
  83. autogen/io/__init__.py +15 -0
  84. autogen/io/base.py +105 -0
  85. autogen/io/console.py +43 -0
  86. autogen/io/websockets.py +213 -0
  87. autogen/logger/__init__.py +11 -0
  88. autogen/logger/base_logger.py +140 -0
  89. autogen/logger/file_logger.py +287 -0
  90. autogen/logger/logger_factory.py +29 -0
  91. autogen/logger/logger_utils.py +42 -0
  92. autogen/logger/sqlite_logger.py +459 -0
  93. autogen/math_utils.py +356 -0
  94. autogen/oai/__init__.py +33 -0
  95. autogen/oai/anthropic.py +428 -0
  96. autogen/oai/bedrock.py +600 -0
  97. autogen/oai/cerebras.py +264 -0
  98. autogen/oai/client.py +1148 -0
  99. autogen/oai/client_utils.py +167 -0
  100. autogen/oai/cohere.py +453 -0
  101. autogen/oai/completion.py +1216 -0
  102. autogen/oai/gemini.py +469 -0
  103. autogen/oai/groq.py +281 -0
  104. autogen/oai/mistral.py +279 -0
  105. autogen/oai/ollama.py +576 -0
  106. autogen/oai/openai_utils.py +810 -0
  107. autogen/oai/together.py +343 -0
  108. autogen/retrieve_utils.py +487 -0
  109. autogen/runtime_logging.py +163 -0
  110. autogen/token_count_utils.py +257 -0
  111. autogen/types.py +20 -0
  112. autogen/version.py +7 -0
@@ -0,0 +1,50 @@
1
+ from dataclasses import dataclass, field
2
+ from typing import List, Optional, Protocol
3
+
4
+ from .document import Document
5
+
6
+
7
+ @dataclass
8
+ class GraphStoreQueryResult:
9
+ """
10
+ A wrapper of graph store query results.
11
+
12
+ answer: human readable answer to question/query.
13
+ results: intermediate results to question/query, e.g. node entities.
14
+ """
15
+
16
+ answer: Optional[str] = None
17
+ results: list = field(default_factory=list)
18
+
19
+
20
+ class GraphQueryEngine(Protocol):
21
+ """An abstract base class that represents a graph query engine on top of a underlying graph database.
22
+
23
+ This interface defines the basic methods for graph rag.
24
+ """
25
+
26
+ def init_db(self, input_doc: List[Document] | None = None):
27
+ """
28
+ This method initializes graph database with the input documents or records.
29
+ Usually, it takes the following steps,
30
+ 1. connecting to a graph database.
31
+ 2. extract graph nodes, edges based on input data, graph schema and etc.
32
+ 3. build indexes etc.
33
+
34
+ Args:
35
+ input_doc: a list of input documents that are used to build the graph in database.
36
+
37
+ """
38
+ pass
39
+
40
+ def add_records(self, new_records: List) -> bool:
41
+ """
42
+ Add new records to the underlying database and add to the graph if required.
43
+ """
44
+ pass
45
+
46
+ def query(self, question: str, n_results: int = 1, **kwargs) -> GraphStoreQueryResult:
47
+ """
48
+ This method transform a string format question into database query and return the result.
49
+ """
50
+ pass
@@ -0,0 +1,56 @@
1
+ from autogen.agentchat.contrib.capabilities.agent_capability import AgentCapability
2
+ from autogen.agentchat.conversable_agent import ConversableAgent
3
+
4
+ from .graph_query_engine import GraphQueryEngine
5
+
6
+
7
+ class GraphRagCapability(AgentCapability):
8
+ """
9
+ A graph rag capability uses a graph query engine to give a conversable agent the graph rag ability.
10
+
11
+ An agent class with graph rag capability could
12
+ 1. create a graph in the underlying database with input documents.
13
+ 2. retrieved relevant information based on messages received by the agent.
14
+ 3. generate answers from retrieved information and send messages back.
15
+
16
+ For example,
17
+ graph_query_engine = GraphQueryEngine(...)
18
+ graph_query_engine.init_db([Document(doc1), Document(doc2), ...])
19
+
20
+ graph_rag_agent = ConversableAgent(
21
+ name="graph_rag_agent",
22
+ max_consecutive_auto_reply=3,
23
+ ...
24
+ )
25
+ graph_rag_capability = GraphRagCapbility(graph_query_engine)
26
+ graph_rag_capability.add_to_agent(graph_rag_agent)
27
+
28
+ user_proxy = UserProxyAgent(
29
+ name="user_proxy",
30
+ code_execution_config=False,
31
+ is_termination_msg=lambda msg: "TERMINATE" in msg["content"],
32
+ human_input_mode="ALWAYS",
33
+ )
34
+ user_proxy.initiate_chat(graph_rag_agent, message="Name a few actors who've played in 'The Matrix'")
35
+
36
+ # ChatResult(
37
+ # chat_id=None,
38
+ # chat_history=[
39
+ # {'content': 'Name a few actors who've played in \'The Matrix\'', 'role': 'graph_rag_agent'},
40
+ # {'content': 'A few actors who have played in The Matrix are:
41
+ # - Keanu Reeves
42
+ # - Laurence Fishburne
43
+ # - Carrie-Anne Moss
44
+ # - Hugo Weaving',
45
+ # 'role': 'user_proxy'},
46
+ # ...)
47
+
48
+ """
49
+
50
+ def __init__(self, query_engine: GraphQueryEngine):
51
+ """
52
+ initialize graph rag capability with a graph query engine
53
+ """
54
+ ...
55
+
56
+ def add_to_agent(self, agent: ConversableAgent): ...
@@ -0,0 +1,390 @@
1
+ # Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ # Portions derived from https://github.com/microsoft/autogen are under the MIT License.
6
+ # SPDX-License-Identifier: MIT
7
+ import base64
8
+ import copy
9
+ import os
10
+ import re
11
+ from io import BytesIO
12
+ from math import ceil
13
+ from typing import Dict, List, Tuple, Union
14
+
15
+ import requests
16
+ from PIL import Image
17
+
18
+ from autogen.agentchat import utils
19
+
20
+ # Parameters for token counting for images for different models
21
+ MODEL_PARAMS = {
22
+ "gpt-4-vision": {
23
+ "max_edge": 2048,
24
+ "min_edge": 768,
25
+ "tile_size": 512,
26
+ "base_token_count": 85,
27
+ "token_multiplier": 170,
28
+ },
29
+ "gpt-4o-mini": {
30
+ "max_edge": 2048,
31
+ "min_edge": 768,
32
+ "tile_size": 512,
33
+ "base_token_count": 2833,
34
+ "token_multiplier": 5667,
35
+ },
36
+ "gpt-4o": {"max_edge": 2048, "min_edge": 768, "tile_size": 512, "base_token_count": 85, "token_multiplier": 170},
37
+ }
38
+
39
+
40
+ def get_pil_image(image_file: Union[str, Image.Image]) -> Image.Image:
41
+ """
42
+ Loads an image from a file and returns a PIL Image object.
43
+
44
+ Parameters:
45
+ image_file (str, or Image): The filename, URL, URI, or base64 string of the image file.
46
+
47
+ Returns:
48
+ Image.Image: The PIL Image object.
49
+ """
50
+ if isinstance(image_file, Image.Image):
51
+ # Already a PIL Image object
52
+ return image_file
53
+
54
+ # Remove quotes if existed
55
+ if image_file.startswith('"') and image_file.endswith('"'):
56
+ image_file = image_file[1:-1]
57
+ if image_file.startswith("'") and image_file.endswith("'"):
58
+ image_file = image_file[1:-1]
59
+
60
+ if image_file.startswith("http://") or image_file.startswith("https://"):
61
+ # A URL file
62
+ response = requests.get(image_file)
63
+ content = BytesIO(response.content)
64
+ image = Image.open(content)
65
+ elif re.match(r"data:image/(?:png|jpeg);base64,", image_file):
66
+ # A URI. Remove the prefix and decode the base64 string.
67
+ base64_data = re.sub(r"data:image/(?:png|jpeg);base64,", "", image_file)
68
+ image = _to_pil(base64_data)
69
+ elif os.path.exists(image_file):
70
+ # A local file
71
+ image = Image.open(image_file)
72
+ else:
73
+ # base64 encoded string
74
+ image = _to_pil(image_file)
75
+
76
+ return image.convert("RGB")
77
+
78
+
79
+ def get_image_data(image_file: Union[str, Image.Image], use_b64=True) -> bytes:
80
+ """
81
+ Loads an image and returns its data either as raw bytes or in base64-encoded format.
82
+
83
+ This function first loads an image from the specified file, URL, or base64 string using
84
+ the `get_pil_image` function. It then saves this image in memory in PNG format and
85
+ retrieves its binary content. Depending on the `use_b64` flag, this binary content is
86
+ either returned directly or as a base64-encoded string.
87
+
88
+ Parameters:
89
+ image_file (str, or Image): The path to the image file, a URL to an image, or a base64-encoded
90
+ string of the image.
91
+ use_b64 (bool): If True, the function returns a base64-encoded string of the image data.
92
+ If False, it returns the raw byte data of the image. Defaults to True.
93
+
94
+ Returns:
95
+ bytes: The image data in raw bytes if `use_b64` is False, or a base64-encoded string
96
+ if `use_b64` is True.
97
+ """
98
+ image = get_pil_image(image_file)
99
+
100
+ buffered = BytesIO()
101
+ image.save(buffered, format="PNG")
102
+ content = buffered.getvalue()
103
+
104
+ if use_b64:
105
+ return base64.b64encode(content).decode("utf-8")
106
+ else:
107
+ return content
108
+
109
+
110
+ def llava_formatter(prompt: str, order_image_tokens: bool = False) -> Tuple[str, List[str]]:
111
+ """
112
+ Formats the input prompt by replacing image tags and returns the new prompt along with image locations.
113
+
114
+ Parameters:
115
+ - prompt (str): The input string that may contain image tags like <img ...>.
116
+ - order_image_tokens (bool, optional): Whether to order the image tokens with numbers.
117
+ It will be useful for GPT-4V. Defaults to False.
118
+
119
+ Returns:
120
+ - Tuple[str, List[str]]: A tuple containing the formatted string and a list of images (loaded in b64 format).
121
+ """
122
+
123
+ # Initialize variables
124
+ new_prompt = prompt
125
+ image_locations = []
126
+ images = []
127
+ image_count = 0
128
+
129
+ # Regular expression pattern for matching <img ...> tags
130
+ img_tag_pattern = re.compile(r"<img ([^>]+)>")
131
+
132
+ # Find all image tags
133
+ for match in img_tag_pattern.finditer(prompt):
134
+ image_location = match.group(1)
135
+
136
+ try:
137
+ img_data = get_image_data(image_location)
138
+ except Exception as e:
139
+ # Remove the token
140
+ print(f"Warning! Unable to load image from {image_location}, because of {e}")
141
+ new_prompt = new_prompt.replace(match.group(0), "", 1)
142
+ continue
143
+
144
+ image_locations.append(image_location)
145
+ images.append(img_data)
146
+
147
+ # Increment the image count and replace the tag in the prompt
148
+ new_token = f"<image {image_count}>" if order_image_tokens else "<image>"
149
+
150
+ new_prompt = new_prompt.replace(match.group(0), new_token, 1)
151
+ image_count += 1
152
+
153
+ return new_prompt, images
154
+
155
+
156
+ def pil_to_data_uri(image: Image.Image) -> str:
157
+ """
158
+ Converts a PIL Image object to a data URI.
159
+
160
+ Parameters:
161
+ image (Image.Image): The PIL Image object.
162
+
163
+ Returns:
164
+ str: The data URI string.
165
+ """
166
+ buffered = BytesIO()
167
+ image.save(buffered, format="PNG")
168
+ content = buffered.getvalue()
169
+ return convert_base64_to_data_uri(base64.b64encode(content).decode("utf-8"))
170
+
171
+
172
+ def convert_base64_to_data_uri(base64_image):
173
+ def _get_mime_type_from_data_uri(base64_image):
174
+ # Decode the base64 string
175
+ image_data = base64.b64decode(base64_image)
176
+ # Check the first few bytes for known signatures
177
+ if image_data.startswith(b"\xff\xd8\xff"):
178
+ return "image/jpeg"
179
+ elif image_data.startswith(b"\x89PNG\r\n\x1a\n"):
180
+ return "image/png"
181
+ elif image_data.startswith(b"GIF87a") or image_data.startswith(b"GIF89a"):
182
+ return "image/gif"
183
+ elif image_data.startswith(b"RIFF") and image_data[8:12] == b"WEBP":
184
+ return "image/webp"
185
+ return "image/jpeg" # use jpeg for unknown formats, best guess.
186
+
187
+ mime_type = _get_mime_type_from_data_uri(base64_image)
188
+ data_uri = f"data:{mime_type};base64,{base64_image}"
189
+ return data_uri
190
+
191
+
192
+ def gpt4v_formatter(prompt: str, img_format: str = "uri") -> List[Union[str, dict]]:
193
+ """
194
+ Formats the input prompt by replacing image tags and returns a list of text and images.
195
+
196
+ Args:
197
+ - prompt (str): The input string that may contain image tags like <img ...>.
198
+ - img_format (str): what image format should be used. One of "uri", "url", "pil".
199
+
200
+ Returns:
201
+ - List[Union[str, dict]]: A list of alternating text and image dictionary items.
202
+ """
203
+ assert img_format in ["uri", "url", "pil"]
204
+
205
+ output = []
206
+ last_index = 0
207
+ image_count = 0
208
+
209
+ # Find all image tags
210
+ for parsed_tag in utils.parse_tags_from_content("img", prompt):
211
+ image_location = parsed_tag["attr"]["src"]
212
+ try:
213
+ if img_format == "pil":
214
+ img_data = get_pil_image(image_location)
215
+ elif img_format == "uri":
216
+ img_data = get_image_data(image_location)
217
+ img_data = convert_base64_to_data_uri(img_data)
218
+ elif img_format == "url":
219
+ img_data = image_location
220
+ else:
221
+ raise ValueError(f"Unknown image format {img_format}")
222
+ except Exception as e:
223
+ # Warning and skip this token
224
+ print(f"Warning! Unable to load image from {image_location}, because {e}")
225
+ continue
226
+
227
+ # Add text before this image tag to output list
228
+ output.append({"type": "text", "text": prompt[last_index : parsed_tag["match"].start()]})
229
+
230
+ # Add image data to output list
231
+ output.append({"type": "image_url", "image_url": {"url": img_data}})
232
+
233
+ last_index = parsed_tag["match"].end()
234
+ image_count += 1
235
+
236
+ # Add remaining text to output list
237
+ output.append({"type": "text", "text": prompt[last_index:]})
238
+ return output
239
+
240
+
241
+ def extract_img_paths(paragraph: str) -> list:
242
+ """
243
+ Extract image paths (URLs or local paths) from a text paragraph.
244
+
245
+ Parameters:
246
+ paragraph (str): The input text paragraph.
247
+
248
+ Returns:
249
+ list: A list of extracted image paths.
250
+ """
251
+ # Regular expression to match image URLs and file paths
252
+ img_path_pattern = re.compile(
253
+ r"\b(?:http[s]?://\S+\.(?:jpg|jpeg|png|gif|bmp)|\S+\.(?:jpg|jpeg|png|gif|bmp))\b", re.IGNORECASE
254
+ )
255
+
256
+ # Find all matches in the paragraph
257
+ img_paths = re.findall(img_path_pattern, paragraph)
258
+ return img_paths
259
+
260
+
261
+ def _to_pil(data: str) -> Image.Image:
262
+ """
263
+ Converts a base64 encoded image data string to a PIL Image object.
264
+
265
+ This function first decodes the base64 encoded string to bytes, then creates a BytesIO object from the bytes,
266
+ and finally creates and returns a PIL Image object from the BytesIO object.
267
+
268
+ Parameters:
269
+ data (str): The encoded image data string.
270
+
271
+ Returns:
272
+ Image.Image: The PIL Image object created from the input data.
273
+ """
274
+ return Image.open(BytesIO(base64.b64decode(data)))
275
+
276
+
277
+ def message_formatter_pil_to_b64(messages: List[Dict]) -> List[Dict]:
278
+ """
279
+ Converts the PIL image URLs in the messages to base64 encoded data URIs.
280
+
281
+ This function iterates over a list of message dictionaries. For each message,
282
+ if it contains a 'content' key with a list of items, it looks for items
283
+ with an 'image_url' key. The function then converts the PIL image URL
284
+ (pointed to by 'image_url') to a base64 encoded data URI.
285
+
286
+ Parameters:
287
+ messages (List[Dict]): A list of message dictionaries. Each dictionary
288
+ may contain a 'content' key with a list of items,
289
+ some of which might be image URLs.
290
+
291
+ Returns:
292
+ List[Dict]: A new list of message dictionaries with PIL image URLs in the
293
+ 'image_url' key converted to base64 encoded data URIs.
294
+
295
+ Example Input:
296
+ [
297
+ {'content': [{'type': 'text', 'text': 'You are a helpful AI assistant.'}], 'role': 'system'},
298
+ {'content': [
299
+ {'type': 'text', 'text': "What's the breed of this dog here? \n"},
300
+ {'type': 'image_url', 'image_url': {'url': a PIL.Image.Image}},
301
+ {'type': 'text', 'text': '.'}],
302
+ 'role': 'user'}
303
+ ]
304
+
305
+ Example Output:
306
+ [
307
+ {'content': [{'type': 'text', 'text': 'You are a helpful AI assistant.'}], 'role': 'system'},
308
+ {'content': [
309
+ {'type': 'text', 'text': "What's the breed of this dog here? \n"},
310
+ {'type': 'image_url', 'image_url': {'url': a B64 Image}},
311
+ {'type': 'text', 'text': '.'}],
312
+ 'role': 'user'}
313
+ ]
314
+ """
315
+ new_messages = []
316
+ for message in messages:
317
+ # Handle the new GPT messages format.
318
+ if isinstance(message, dict) and "content" in message and isinstance(message["content"], list):
319
+ message = copy.deepcopy(message)
320
+ for item in message["content"]:
321
+ if isinstance(item, dict) and "image_url" in item:
322
+ item["image_url"]["url"] = pil_to_data_uri(item["image_url"]["url"])
323
+
324
+ new_messages.append(message)
325
+
326
+ return new_messages
327
+
328
+
329
+ def num_tokens_from_gpt_image(
330
+ image_data: Union[str, Image.Image], model: str = "gpt-4-vision", low_quality: bool = False
331
+ ) -> int:
332
+ """
333
+ Calculate the number of tokens required to process an image based on its dimensions
334
+ after scaling for different GPT models. Supports "gpt-4-vision", "gpt-4o", and "gpt-4o-mini".
335
+ This function scales the image so that its longest edge is at most 2048 pixels and its shortest
336
+ edge is at most 768 pixels (for "gpt-4-vision"). It then calculates the number of 512x512 tiles
337
+ needed to cover the scaled image and computes the total tokens based on the number of these tiles.
338
+
339
+ Reference: https://openai.com/api/pricing/
340
+
341
+ Args:
342
+ image_data : Union[str, Image.Image]: The image data which can either be a base64
343
+ encoded string, a URL, a file path, or a PIL Image object.
344
+ model: str: The model being used for image processing. Can be "gpt-4-vision", "gpt-4o", or "gpt-4o-mini".
345
+
346
+ Returns:
347
+ int: The total number of tokens required for processing the image.
348
+
349
+ Examples:
350
+ --------
351
+ >>> from PIL import Image
352
+ >>> img = Image.new('RGB', (2500, 2500), color = 'red')
353
+ >>> num_tokens_from_gpt_image(img, model="gpt-4-vision")
354
+ 765
355
+ """
356
+
357
+ image = get_pil_image(image_data) # PIL Image
358
+ width, height = image.size
359
+
360
+ # Determine model parameters
361
+ if "gpt-4-vision" in model or "gpt-4-turbo" in model or "gpt-4v" in model or "gpt-4-v" in model:
362
+ params = MODEL_PARAMS["gpt-4-vision"]
363
+ elif "gpt-4o-mini" in model:
364
+ params = MODEL_PARAMS["gpt-4o-mini"]
365
+ elif "gpt-4o" in model:
366
+ params = MODEL_PARAMS["gpt-4o"]
367
+ else:
368
+ raise ValueError(
369
+ f"Model {model} is not supported. Choose 'gpt-4-vision', 'gpt-4-turbo', 'gpt-4v', 'gpt-4-v', 'gpt-4o', or 'gpt-4o-mini'."
370
+ )
371
+
372
+ if low_quality:
373
+ return params["base_token_count"]
374
+
375
+ # 1. Constrain the longest edge
376
+ if max(width, height) > params["max_edge"]:
377
+ scale_factor = params["max_edge"] / max(width, height)
378
+ width, height = int(width * scale_factor), int(height * scale_factor)
379
+
380
+ # 2. Further constrain the shortest edge
381
+ if min(width, height) > params["min_edge"]:
382
+ scale_factor = params["min_edge"] / min(width, height)
383
+ width, height = int(width * scale_factor), int(height * scale_factor)
384
+
385
+ # 3. Count how many tiles are needed to cover the image
386
+ tiles_width = ceil(width / params["tile_size"])
387
+ tiles_height = ceil(height / params["tile_size"])
388
+ total_tokens = params["base_token_count"] + params["token_multiplier"] * (tiles_width * tiles_height)
389
+
390
+ return total_tokens
@@ -0,0 +1,114 @@
1
+ # Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ # Portions derived from https://github.com/microsoft/autogen are under the MIT License.
6
+ # SPDX-License-Identifier: MIT
7
+ from typing import Dict, List, Optional, Tuple, Union
8
+
9
+ from autogen import OpenAIWrapper
10
+ from autogen.agentchat import Agent, ConversableAgent
11
+ from autogen.agentchat.contrib.vectordb.utils import get_logger
12
+
13
+ logger = get_logger(__name__)
14
+
15
+ try:
16
+ from llama_index.core.agent.runner.base import AgentRunner
17
+ from llama_index.core.base.llms.types import ChatMessage
18
+ from llama_index.core.chat_engine.types import AgentChatResponse
19
+ except ImportError as e:
20
+ logger.fatal("Failed to import llama-index. Try running 'pip install llama-index'")
21
+ raise e
22
+
23
+
24
+ class LLamaIndexConversableAgent(ConversableAgent):
25
+ def __init__(
26
+ self,
27
+ name: str,
28
+ llama_index_agent: AgentRunner,
29
+ description: Optional[str] = None,
30
+ **kwargs,
31
+ ):
32
+ """
33
+ Args:
34
+ name (str): agent name.
35
+ llama_index_agent (AgentRunner): llama index agent.
36
+ Please override this attribute if you want to reprogram the agent.
37
+ description (str): a short description of the agent. This description is used by other agents
38
+ (e.g. the GroupChatManager) to decide when to call upon this agent.
39
+ **kwargs (dict): Please refer to other kwargs in
40
+ [ConversableAgent](../conversable_agent#__init__).
41
+ """
42
+
43
+ if llama_index_agent is None:
44
+ raise ValueError("llama_index_agent must be provided")
45
+
46
+ if description is None or description.isspace():
47
+ raise ValueError("description must be provided")
48
+
49
+ super().__init__(
50
+ name,
51
+ description=description,
52
+ **kwargs,
53
+ )
54
+
55
+ self._llama_index_agent = llama_index_agent
56
+
57
+ # Override the `generate_oai_reply`
58
+ self.replace_reply_func(ConversableAgent.generate_oai_reply, LLamaIndexConversableAgent._generate_oai_reply)
59
+
60
+ self.replace_reply_func(ConversableAgent.a_generate_oai_reply, LLamaIndexConversableAgent._a_generate_oai_reply)
61
+
62
+ def _generate_oai_reply(
63
+ self,
64
+ messages: Optional[List[Dict]] = None,
65
+ sender: Optional[Agent] = None,
66
+ config: Optional[OpenAIWrapper] = None,
67
+ ) -> Tuple[bool, Union[str, Dict, None]]:
68
+ """Generate a reply using autogen.oai."""
69
+ user_message, history = self._extract_message_and_history(messages=messages, sender=sender)
70
+
71
+ chatResponse: AgentChatResponse = self._llama_index_agent.chat(message=user_message, chat_history=history)
72
+
73
+ extracted_response = chatResponse.response
74
+
75
+ return (True, extracted_response)
76
+
77
+ async def _a_generate_oai_reply(
78
+ self,
79
+ messages: Optional[List[Dict]] = None,
80
+ sender: Optional[Agent] = None,
81
+ config: Optional[OpenAIWrapper] = None,
82
+ ) -> Tuple[bool, Union[str, Dict, None]]:
83
+ """Generate a reply using autogen.oai."""
84
+ user_message, history = self._extract_message_and_history(messages=messages, sender=sender)
85
+
86
+ chatResponse: AgentChatResponse = await self._llama_index_agent.achat(
87
+ message=user_message, chat_history=history
88
+ )
89
+
90
+ extracted_response = chatResponse.response
91
+
92
+ return (True, extracted_response)
93
+
94
+ def _extract_message_and_history(
95
+ self, messages: Optional[List[Dict]] = None, sender: Optional[Agent] = None
96
+ ) -> Tuple[str, List[ChatMessage]]:
97
+ """Extract the message and history from the messages."""
98
+ if not messages:
99
+ messages = self._oai_messages[sender]
100
+
101
+ if not messages:
102
+ return "", []
103
+
104
+ message = messages[-1].get("content", "")
105
+
106
+ history = messages[:-1]
107
+ history_messages: List[ChatMessage] = []
108
+ for history_message in history:
109
+ content = history_message.get("content", "")
110
+ role = history_message.get("role", "user")
111
+ if role:
112
+ if role == "user" or role == "assistant":
113
+ history_messages.append(ChatMessage(content=content, role=role, additional_kwargs={}))
114
+ return message, history_messages