ag2 0.9.3__py3-none-any.whl → 0.9.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ag2 might be problematic. Click here for more details.

Files changed (32) hide show
  1. {ag2-0.9.3.dist-info → ag2-0.9.5.dist-info}/METADATA +1 -1
  2. {ag2-0.9.3.dist-info → ag2-0.9.5.dist-info}/RECORD +32 -25
  3. autogen/agentchat/contrib/agent_optimizer.py +6 -3
  4. autogen/agentchat/conversable_agent.py +51 -5
  5. autogen/agentchat/group/group_utils.py +16 -7
  6. autogen/agentchat/group/guardrails.py +171 -0
  7. autogen/agentchat/group/targets/transition_target.py +10 -0
  8. autogen/agentchat/groupchat.py +93 -6
  9. autogen/agentchat/realtime/experimental/realtime_swarm.py +2 -0
  10. autogen/agents/experimental/websurfer/websurfer.py +9 -1
  11. autogen/code_utils.py +8 -6
  12. autogen/events/agent_events.py +6 -0
  13. autogen/events/helpers.py +8 -0
  14. autogen/mcp/helpers.py +45 -0
  15. autogen/mcp/mcp_proxy/mcp_proxy.py +2 -3
  16. autogen/messages/agent_messages.py +1 -1
  17. autogen/oai/client.py +44 -1
  18. autogen/oai/gemini.py +39 -24
  19. autogen/oai/gemini_types.py +1 -1
  20. autogen/oai/openai_responses.py +426 -0
  21. autogen/tools/experimental/__init__.py +4 -0
  22. autogen/tools/experimental/browser_use/browser_use.py +4 -11
  23. autogen/tools/experimental/firecrawl/__init__.py +7 -0
  24. autogen/tools/experimental/firecrawl/firecrawl_tool.py +853 -0
  25. autogen/tools/experimental/searxng/__init__.py +7 -0
  26. autogen/tools/experimental/searxng/searxng_search.py +141 -0
  27. autogen/version.py +1 -1
  28. templates/client_template/main.jinja2 +5 -2
  29. templates/main.jinja2 +1 -1
  30. {ag2-0.9.3.dist-info → ag2-0.9.5.dist-info}/WHEEL +0 -0
  31. {ag2-0.9.3.dist-info → ag2-0.9.5.dist-info}/licenses/LICENSE +0 -0
  32. {ag2-0.9.3.dist-info → ag2-0.9.5.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,426 @@
1
+ # Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ import copy
6
+ import warnings
7
+ from typing import TYPE_CHECKING, Any, Tuple, Union
8
+
9
+ from pydantic import BaseModel
10
+
11
+ from autogen.code_utils import content_str
12
+ from autogen.import_utils import optional_import_block, require_optional_import
13
+
14
+ if TYPE_CHECKING:
15
+ from autogen.oai.client import ModelClient, OpenAI, OpenAILLMConfigEntry
16
+ else:
17
+ # Import at runtime to avoid circular import
18
+ OpenAILLMConfigEntry = None
19
+ ModelClient = None
20
+ OpenAI = None
21
+
22
+ with optional_import_block() as openai_result:
23
+ from openai.types.responses.response import Response
24
+ from openai.types.responses.response_output_item import ImageGenerationCall
25
+
26
+ # Image Costs
27
+ # Pricing per image (in USD)
28
+ PRICING = {
29
+ "gpt-image-1": {
30
+ "low": {"1024x1024": 0.011, "1024x1536": 0.016, "1536x1024": 0.016},
31
+ "medium": {"1024x1024": 0.042, "1024x1536": 0.063, "1536x1024": 0.063},
32
+ "high": {"1024x1024": 0.167, "1024x1536": 0.25, "1536x1024": 0.25},
33
+ },
34
+ "dall-e-3": {
35
+ "standard": {"1024x1024": 0.040, "1024x1792": 0.080, "1792x1024": 0.080},
36
+ "hd": {"1024x1024": 0.080, "1024x1792": 0.120, "1792x1024": 0.120},
37
+ },
38
+ "dall-e-2": {"standard": {"1024x1024": 0.020, "512x512": 0.018, "256x256": 0.016}},
39
+ }
40
+
41
+ # Valid sizes for each model
42
+ VALID_SIZES = {
43
+ "gpt-image-1": ["1024x1024", "1024x1536", "1536x1024"],
44
+ "dall-e-3": ["1024x1024", "1024x1792", "1792x1024"],
45
+ "dall-e-2": ["1024x1024", "512x512", "256x256"],
46
+ }
47
+
48
+
49
+ def calculate_openai_image_cost(
50
+ model: str = "gpt-image-1", size: str = "1024x1024", quality: str = "high"
51
+ ) -> Tuple[float, str]:
52
+ """
53
+ Calculate the cost for a single image generation.
54
+
55
+ Args:
56
+ model: Model name ("gpt-image-1", "dall-e-3" or "dall-e-2")
57
+ size: Image size (e.g., "1024x1024", "1024x1536")
58
+ quality: Quality setting:
59
+ - For gpt-image-1: "low", "medium", or "high"
60
+ - For dall-e-3: "standard" or "hd"
61
+ - For dall-e-2: "standard" only
62
+
63
+ Returns:
64
+ Tuple of (cost, error_message)
65
+ """
66
+ # Normalize inputs
67
+ model = model.lower()
68
+ quality = quality.lower()
69
+
70
+ # Validate model
71
+ if model not in PRICING:
72
+ return 0.0, f"Invalid model: {model}. Valid models: {list(PRICING.keys())}"
73
+
74
+ # Validate size
75
+ if size not in VALID_SIZES[model]:
76
+ return 0.0, f"Invalid size {size} for {model}. Valid sizes: {VALID_SIZES[model]}"
77
+
78
+ # Get the cost based on model type
79
+ try:
80
+ if model == "gpt-image-1" or model == "dall-e-3":
81
+ cost = PRICING[model][quality][size]
82
+ elif model == "dall-e-2":
83
+ cost = PRICING[model]["standard"][size]
84
+ else:
85
+ return 0.0, f"Model {model} not properly configured"
86
+
87
+ return cost, None
88
+
89
+ except KeyError:
90
+ return 0.0, f"Invalid quality '{quality}' for {model}"
91
+
92
+
93
+ def _get_base_class():
94
+ """Lazy import OpenAILLMConfigEntry to avoid circular imports."""
95
+ from autogen.oai.client import OpenAILLMConfigEntry
96
+
97
+ return OpenAILLMConfigEntry
98
+
99
+
100
+ # -----------------------------------------------------------------------------
101
+ # OpenAI Client that calls the /responses endpoint
102
+ # -----------------------------------------------------------------------------
103
+ @require_optional_import("openai", "openai")
104
+ class OpenAIResponsesClient:
105
+ """Minimal implementation targeting the experimental /responses endpoint.
106
+
107
+ We purposefully keep the surface small - *create*, *message_retrieval*,
108
+ *cost* and *get_usage* - enough for ConversableAgent to operate. Anything
109
+ that the new endpoint does natively (web_search, file_search, image
110
+ generation, function calling, etc.) is transparently passed through by the
111
+ OpenAI SDK so we don't replicate logic here.
112
+ """
113
+
114
+ def __init__(
115
+ self,
116
+ client: "OpenAI",
117
+ response_format: Union[BaseModel, dict[str, Any], None] = None,
118
+ ):
119
+ self._oai_client = client # plain openai.OpenAI instance
120
+ self.response_format = response_format # kept for parity but unused for now
121
+
122
+ # Initialize the image generation parameters
123
+ self.image_output_params = {
124
+ "quality": None, # "high" or "low"
125
+ "background": None, # "white" or "black" or "transparent"
126
+ "size": None, # "1024x1024" or "1024x1792" or "1792x1024"
127
+ "output_format": "png", # "png", "jpg" or "jpeg" or "webp"
128
+ "output_compression": None, # 0-100 if output_format is "jpg" or "jpeg" or "webp"
129
+ }
130
+ self.previous_response_id = None
131
+
132
+ # Image costs are calculated manually (rather than off returned information)
133
+ self.image_costs = 0
134
+
135
+ # ------------------------------------------------------------------ helpers
136
+ # responses objects embed usage similarly to chat completions
137
+ @staticmethod
138
+ def _usage_dict(resp) -> dict:
139
+ usage_obj = getattr(resp, "usage", None) or {}
140
+
141
+ # Convert pydantic/BaseModel usage objects to dict for uniform access
142
+ if hasattr(usage_obj, "model_dump"):
143
+ usage = usage_obj.model_dump()
144
+ elif isinstance(usage_obj, dict):
145
+ usage = usage_obj
146
+ else: # fallback - unknown structure
147
+ usage = {}
148
+
149
+ output_tokens_details = usage.get("output_tokens_details", {})
150
+
151
+ return {
152
+ "prompt_tokens": usage.get("input_tokens", 0),
153
+ "completion_tokens": usage.get("output_tokens", 0),
154
+ "total_tokens": usage.get("total_tokens", 0),
155
+ "cost": getattr(resp, "cost", 0),
156
+ "model": getattr(resp, "model", ""),
157
+ "reasoning_tokens": output_tokens_details.get("reasoning_tokens", 0),
158
+ }
159
+
160
+ def _add_image_cost(self, response: "Response") -> None:
161
+ """Add image cost to self._image_costs when an image is generated"""
162
+ for output in response.output:
163
+ if (
164
+ isinstance(output, ImageGenerationCall)
165
+ and hasattr(response.output[0], "model_extra")
166
+ and response.output[0].model_extra
167
+ ):
168
+ extra_fields = output.model_extra
169
+
170
+ image_cost, image_error = calculate_openai_image_cost(
171
+ model="gpt-image-1",
172
+ size=extra_fields.get("size", "1024x1536"),
173
+ quality=extra_fields.get("quality", "high"),
174
+ )
175
+
176
+ if not image_error and image_cost:
177
+ self.image_costs += image_cost
178
+
179
+ def _get_delta_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
180
+ """Get the delta messages from the messages."""
181
+ delta_messages = []
182
+ for m in messages[::-1]:
183
+ contents = m.get("content")
184
+ is_last_completed_response = False
185
+ if isinstance(contents, list):
186
+ for c in contents:
187
+ if "status" in c and c.get("status") == "completed":
188
+ is_last_completed_response = True
189
+ break
190
+ elif isinstance(contents, str):
191
+ is_last_completed_response = "status" in m and m.get("status") == "completed"
192
+
193
+ if is_last_completed_response:
194
+ break
195
+ delta_messages.append(m)
196
+ return delta_messages[::-1]
197
+
198
+ def create(self, params: dict[str, Any]) -> "Response":
199
+ """Invoke `client.responses.create() or .parse()`.
200
+
201
+ If the caller provided a classic *messages* array we convert it to the
202
+ *input* format expected by the Responses API.
203
+ """
204
+ params = params.copy()
205
+
206
+ image_generation_tool_params = {"type": "image_generation"}
207
+ web_search_tool_params = {"type": "web_search_preview"}
208
+
209
+ if self.previous_response_id is not None and "previous_response_id" not in params:
210
+ params["previous_response_id"] = self.previous_response_id
211
+
212
+ # Back-compat: transform messages → input if needed ------------------
213
+ if "messages" in params and "input" not in params:
214
+ msgs = self._get_delta_messages(params.pop("messages"))
215
+ input_items = []
216
+ for m in msgs[::-1]: # reverse the list to get the last item first
217
+ role = m.get("role", "user")
218
+ # First, we need to convert the content to the Responses API format
219
+ content = m.get("content")
220
+ blocks = []
221
+ if role != "tool":
222
+ if isinstance(content, list):
223
+ for c in content:
224
+ if c.get("type") in ["input_text", "text"]:
225
+ blocks.append({"type": "input_text", "text": c.get("text")})
226
+ elif c.get("type") == "input_image":
227
+ blocks.append({"type": "input_image", "image_url": c.get("image_url")})
228
+ elif c.get("type") == "image_params":
229
+ for k, v in c.get("image_params", {}).items():
230
+ if k in self.image_output_params:
231
+ image_generation_tool_params[k] = v
232
+ else:
233
+ raise ValueError(f"Invalid content type: {c.get('type')}")
234
+ else:
235
+ blocks.append({"type": "input_text", "text": content})
236
+ input_items.append({"role": role, "content": blocks})
237
+
238
+ else:
239
+ if input_items:
240
+ break
241
+ # tool call response is the last item in the list
242
+ content = content_str(m.get("content"))
243
+ input_items.append({
244
+ "type": "function_call_output",
245
+ "call_id": m.get("tool_call_id", None),
246
+ "output": content,
247
+ })
248
+ break
249
+ params["input"] = input_items[::-1]
250
+
251
+ # Initialize tools list
252
+ tools_list = []
253
+ # Back-compat: add default tools
254
+ built_in_tools = params.pop("built_in_tools", [])
255
+ if built_in_tools:
256
+ if "image_generation" in built_in_tools:
257
+ tools_list.append(image_generation_tool_params)
258
+ if "web_search" in built_in_tools:
259
+ tools_list.append(web_search_tool_params)
260
+
261
+ if "tools" in params:
262
+ for tool in params["tools"]:
263
+ tool_item = {"type": "function"}
264
+ if "function" in tool:
265
+ tool_item |= tool["function"]
266
+ tools_list.append(tool_item)
267
+ params["tools"] = tools_list
268
+ params["tool_choice"] = "auto"
269
+
270
+ # Ensure we don't mix legacy params that Responses doesn't accept
271
+ if params.get("stream") and params.get("background"):
272
+ warnings.warn(
273
+ "Streaming a background response may introduce latency.",
274
+ UserWarning,
275
+ )
276
+
277
+ # ------------------------------------------------------------------
278
+ # Structured output handling - mimic OpenAIClient behaviour
279
+ # ------------------------------------------------------------------
280
+
281
+ if self.response_format is not None or "response_format" in params:
282
+
283
+ def _create_or_parse(**kwargs):
284
+ # For structured output we must convert dict / pydantic model
285
+ # into the JSON-schema body expected by the API.
286
+ if "stream" in kwargs:
287
+ kwargs.pop("stream") # Responses API rejects stream with RF for now
288
+
289
+ rf = kwargs.get("response_format", self.response_format)
290
+
291
+ if isinstance(rf, dict):
292
+ from autogen.oai.client import _ensure_strict_json_schema
293
+
294
+ kwargs["text_format"] = {
295
+ "type": "json_schema",
296
+ "json_schema": {
297
+ "schema": _ensure_strict_json_schema(rf, path=(), root=rf),
298
+ "name": "response_format",
299
+ "strict": True,
300
+ },
301
+ }
302
+ else:
303
+ # pydantic.BaseModel subclass
304
+ from autogen.oai.client import type_to_response_format_param
305
+
306
+ kwargs["text_format"] = type_to_response_format_param(rf)
307
+ if "response_format" in kwargs:
308
+ kwargs["text_format"] = kwargs.pop("response_format")
309
+
310
+ try:
311
+ return self._oai_client.responses.parse(**kwargs)
312
+ except TypeError as e:
313
+ # Older openai-python versions may not yet expose the
314
+ # text_format parameter on the Responses endpoint.
315
+ if "text_format" in str(e) and "unexpected" in str(e):
316
+ warnings.warn(
317
+ "Installed openai-python version doesn't support "
318
+ "`response_format` for the Responses API. "
319
+ "Falling back to raw text output.",
320
+ UserWarning,
321
+ )
322
+ kwargs.pop("text_format", None)
323
+ return self._oai_client.responses.create(**kwargs)
324
+
325
+ response = _create_or_parse(**params)
326
+ self.previous_response_id = response.id
327
+ return response
328
+
329
+ # No structured output
330
+ response = self._oai_client.responses.create(**params)
331
+ self.previous_response_id = response.id
332
+
333
+ # Accumulate image costs
334
+ self._add_image_cost(response)
335
+
336
+ return response
337
+
338
+ def message_retrieval(
339
+ self, response
340
+ ) -> Union[list[str], list["ModelClient.ModelClientResponseProtocol.Choice.Message"]]:
341
+ output = getattr(response, "output", [])
342
+ content = [] # list[dict[str, Union[str, dict[str, Any]]]]]
343
+ tool_calls = []
344
+ for item in output:
345
+ # Convert pydantic objects to plain dicts for uniform handling
346
+ if hasattr(item, "model_dump"):
347
+ item = item.model_dump()
348
+
349
+ item_type = item.get("type")
350
+
351
+ # ------------------------------------------------------------------
352
+ # 1) Normal messages
353
+ # ------------------------------------------------------------------
354
+ if item_type == "message":
355
+ new_item = copy.deepcopy(item)
356
+ new_item["type"] = "text"
357
+ new_item["role"] = "assistant"
358
+ blocks = item.get("content", [])
359
+ if len(blocks) == 1 and blocks[0].get("type") == "output_text":
360
+ new_item["text"] = blocks[0]["text"]
361
+ if "content" in new_item:
362
+ del new_item["content"]
363
+ content.append(new_item)
364
+ continue
365
+
366
+ # ------------------------------------------------------------------
367
+ # 2) Custom function calls
368
+ # ------------------------------------------------------------------
369
+ if item_type == "function_call":
370
+ tool_calls.append({
371
+ "id": item.get("call_id", None),
372
+ "function": {
373
+ "name": item.get("name", None),
374
+ "arguments": item.get("arguments"),
375
+ },
376
+ "type": "function_call",
377
+ })
378
+ continue
379
+
380
+ # ------------------------------------------------------------------
381
+ # 3) Built-in tool calls
382
+ # ------------------------------------------------------------------
383
+ if item_type and item_type.endswith("_call"):
384
+ tool_name = item_type.replace("_call", "")
385
+ tool_call_args = {
386
+ "id": item.get("id"),
387
+ "role": "tool_calls",
388
+ "type": "tool_call", # Responses API currently routes via function-like tools
389
+ "name": tool_name,
390
+ }
391
+ if tool_name == "image_generation":
392
+ for k in self.image_output_params:
393
+ if k in item:
394
+ tool_call_args[k] = item[k]
395
+ encoded_base64_result = item.get("result", "")
396
+ tool_call_args["content"] = encoded_base64_result
397
+ # add image_url for image input back to oai response api.
398
+ output_format = self.image_output_params["output_format"]
399
+ tool_call_args["image_url"] = f"data:image/{output_format};base64,{encoded_base64_result}"
400
+ elif tool_name == "web_search":
401
+ pass
402
+ else:
403
+ raise ValueError(f"Invalid tool name: {tool_name}")
404
+ content.append(tool_call_args)
405
+ continue
406
+
407
+ # ------------------------------------------------------------------
408
+ # 4) Fallback - store raw dict so information isn't lost
409
+ # ------------------------------------------------------------------
410
+ content.append(item)
411
+
412
+ return [
413
+ {
414
+ "role": "assistant",
415
+ "id": response.id,
416
+ "content": content if content else None,
417
+ "tool_calls": tool_calls,
418
+ }
419
+ ]
420
+
421
+ def cost(self, response):
422
+ return self._usage_dict(response).get("cost", 0) + self.image_costs
423
+
424
+ @staticmethod
425
+ def get_usage(response):
426
+ return OpenAIResponsesClient._usage_dict(response)
@@ -6,6 +6,7 @@ from .browser_use import BrowserUseTool
6
6
  from .crawl4ai import Crawl4AITool
7
7
  from .deep_research import DeepResearchTool
8
8
  from .duckduckgo import DuckDuckGoSearchTool
9
+ from .firecrawl import FirecrawlTool
9
10
  from .google_search import GoogleSearchTool, YoutubeSearchTool
10
11
  from .messageplatform import (
11
12
  DiscordRetrieveTool,
@@ -18,6 +19,7 @@ from .messageplatform import (
18
19
  )
19
20
  from .perplexity import PerplexitySearchTool
20
21
  from .reliable import ReliableTool, ReliableToolError, SuccessfulExecutionParameters, ToolExecutionDetails
22
+ from .searxng import SearxngSearchTool
21
23
  from .tavily import TavilySearchTool
22
24
  from .web_search_preview import WebSearchPreviewTool
23
25
  from .wikipedia import WikipediaPageLoadTool, WikipediaQueryRunTool
@@ -29,10 +31,12 @@ __all__ = [
29
31
  "DiscordRetrieveTool",
30
32
  "DiscordSendTool",
31
33
  "DuckDuckGoSearchTool",
34
+ "FirecrawlTool",
32
35
  "GoogleSearchTool",
33
36
  "PerplexitySearchTool",
34
37
  "ReliableTool",
35
38
  "ReliableToolError",
39
+ "SearxngSearchTool",
36
40
  "SlackRetrieveRepliesTool",
37
41
  "SlackRetrieveTool",
38
42
  "SlackSendTool",
@@ -78,7 +78,7 @@ class BrowserUseTool(Tool):
78
78
  def __init__( # type: ignore[no-any-unimported]
79
79
  self,
80
80
  *,
81
- llm_config: Union[LLMConfig, dict[str, Any]],
81
+ llm_config: Optional[Union[LLMConfig, dict[str, Any]]] = None,
82
82
  browser: Optional["Browser"] = None,
83
83
  agent_kwargs: Optional[dict[str, Any]] = None,
84
84
  browser_config: Optional[dict[str, Any]] = None,
@@ -86,17 +86,17 @@ class BrowserUseTool(Tool):
86
86
  """Use the browser to perform a task.
87
87
 
88
88
  Args:
89
- llm_config: The LLM configuration.
89
+ llm_config: The LLM configuration. If None, the current LLMConfig from context is used.
90
90
  browser: The browser to use. If defined, browser_config must be None
91
91
  agent_kwargs: Additional keyword arguments to pass to the Agent
92
92
  browser_config: The browser configuration to use. If defined, browser must be None
93
93
  """
94
+ if llm_config is None:
95
+ llm_config = LLMConfig.current
94
96
  if agent_kwargs is None:
95
97
  agent_kwargs = {}
96
-
97
98
  if browser_config is None:
98
99
  browser_config = {}
99
-
100
100
  if browser is not None and browser_config:
101
101
  raise ValueError(
102
102
  f"Cannot provide both browser and additional keyword parameters: {browser=}, {browser_config=}"
@@ -114,18 +114,13 @@ class BrowserUseTool(Tool):
114
114
  if browser is None:
115
115
  # set default value for headless
116
116
  headless = browser_config.pop("headless", True)
117
-
118
117
  browser_config = BrowserConfig(headless=headless, **browser_config)
119
118
  browser = Browser(config=browser_config)
120
-
121
119
  # set default value for generate_gif
122
120
  if "generate_gif" not in agent_kwargs:
123
121
  agent_kwargs["generate_gif"] = False
124
-
125
122
  llm = LangChainChatModelFactory.create_base_chat_model(llm_config)
126
-
127
123
  max_steps = agent_kwargs.pop("max_steps", 100)
128
-
129
124
  agent = Agent(
130
125
  task=task,
131
126
  llm=llm,
@@ -133,9 +128,7 @@ class BrowserUseTool(Tool):
133
128
  controller=BrowserUseTool._get_controller(llm_config),
134
129
  **agent_kwargs,
135
130
  )
136
-
137
131
  result = await agent.run(max_steps=max_steps)
138
-
139
132
  extracted_content = [
140
133
  ExtractedContent(content=content, url=url)
141
134
  for content, url in zip(result.extracted_content(), result.urls())
@@ -0,0 +1,7 @@
1
+ # Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ from .firecrawl_tool import FirecrawlTool
6
+
7
+ __all__ = ["FirecrawlTool"]