ag2 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ag2 might be problematic. Click here for more details.

Files changed (112) hide show
  1. ag2-0.3.2.dist-info/LICENSE +201 -0
  2. ag2-0.3.2.dist-info/METADATA +490 -0
  3. ag2-0.3.2.dist-info/NOTICE.md +19 -0
  4. ag2-0.3.2.dist-info/RECORD +112 -0
  5. ag2-0.3.2.dist-info/WHEEL +5 -0
  6. ag2-0.3.2.dist-info/top_level.txt +1 -0
  7. autogen/__init__.py +17 -0
  8. autogen/_pydantic.py +116 -0
  9. autogen/agentchat/__init__.py +26 -0
  10. autogen/agentchat/agent.py +142 -0
  11. autogen/agentchat/assistant_agent.py +85 -0
  12. autogen/agentchat/chat.py +306 -0
  13. autogen/agentchat/contrib/__init__.py +0 -0
  14. autogen/agentchat/contrib/agent_builder.py +785 -0
  15. autogen/agentchat/contrib/agent_optimizer.py +450 -0
  16. autogen/agentchat/contrib/capabilities/__init__.py +0 -0
  17. autogen/agentchat/contrib/capabilities/agent_capability.py +21 -0
  18. autogen/agentchat/contrib/capabilities/generate_images.py +297 -0
  19. autogen/agentchat/contrib/capabilities/teachability.py +406 -0
  20. autogen/agentchat/contrib/capabilities/text_compressors.py +72 -0
  21. autogen/agentchat/contrib/capabilities/transform_messages.py +92 -0
  22. autogen/agentchat/contrib/capabilities/transforms.py +565 -0
  23. autogen/agentchat/contrib/capabilities/transforms_util.py +120 -0
  24. autogen/agentchat/contrib/capabilities/vision_capability.py +217 -0
  25. autogen/agentchat/contrib/gpt_assistant_agent.py +545 -0
  26. autogen/agentchat/contrib/graph_rag/__init__.py +0 -0
  27. autogen/agentchat/contrib/graph_rag/document.py +24 -0
  28. autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py +76 -0
  29. autogen/agentchat/contrib/graph_rag/graph_query_engine.py +50 -0
  30. autogen/agentchat/contrib/graph_rag/graph_rag_capability.py +56 -0
  31. autogen/agentchat/contrib/img_utils.py +390 -0
  32. autogen/agentchat/contrib/llamaindex_conversable_agent.py +114 -0
  33. autogen/agentchat/contrib/llava_agent.py +176 -0
  34. autogen/agentchat/contrib/math_user_proxy_agent.py +471 -0
  35. autogen/agentchat/contrib/multimodal_conversable_agent.py +128 -0
  36. autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py +325 -0
  37. autogen/agentchat/contrib/retrieve_assistant_agent.py +56 -0
  38. autogen/agentchat/contrib/retrieve_user_proxy_agent.py +701 -0
  39. autogen/agentchat/contrib/society_of_mind_agent.py +203 -0
  40. autogen/agentchat/contrib/text_analyzer_agent.py +76 -0
  41. autogen/agentchat/contrib/vectordb/__init__.py +0 -0
  42. autogen/agentchat/contrib/vectordb/base.py +243 -0
  43. autogen/agentchat/contrib/vectordb/chromadb.py +326 -0
  44. autogen/agentchat/contrib/vectordb/mongodb.py +559 -0
  45. autogen/agentchat/contrib/vectordb/pgvectordb.py +958 -0
  46. autogen/agentchat/contrib/vectordb/qdrant.py +334 -0
  47. autogen/agentchat/contrib/vectordb/utils.py +126 -0
  48. autogen/agentchat/contrib/web_surfer.py +305 -0
  49. autogen/agentchat/conversable_agent.py +2904 -0
  50. autogen/agentchat/groupchat.py +1666 -0
  51. autogen/agentchat/user_proxy_agent.py +109 -0
  52. autogen/agentchat/utils.py +207 -0
  53. autogen/browser_utils.py +291 -0
  54. autogen/cache/__init__.py +10 -0
  55. autogen/cache/abstract_cache_base.py +78 -0
  56. autogen/cache/cache.py +182 -0
  57. autogen/cache/cache_factory.py +85 -0
  58. autogen/cache/cosmos_db_cache.py +150 -0
  59. autogen/cache/disk_cache.py +109 -0
  60. autogen/cache/in_memory_cache.py +61 -0
  61. autogen/cache/redis_cache.py +128 -0
  62. autogen/code_utils.py +745 -0
  63. autogen/coding/__init__.py +22 -0
  64. autogen/coding/base.py +113 -0
  65. autogen/coding/docker_commandline_code_executor.py +262 -0
  66. autogen/coding/factory.py +45 -0
  67. autogen/coding/func_with_reqs.py +203 -0
  68. autogen/coding/jupyter/__init__.py +22 -0
  69. autogen/coding/jupyter/base.py +32 -0
  70. autogen/coding/jupyter/docker_jupyter_server.py +164 -0
  71. autogen/coding/jupyter/embedded_ipython_code_executor.py +182 -0
  72. autogen/coding/jupyter/jupyter_client.py +224 -0
  73. autogen/coding/jupyter/jupyter_code_executor.py +161 -0
  74. autogen/coding/jupyter/local_jupyter_server.py +168 -0
  75. autogen/coding/local_commandline_code_executor.py +410 -0
  76. autogen/coding/markdown_code_extractor.py +44 -0
  77. autogen/coding/utils.py +57 -0
  78. autogen/exception_utils.py +46 -0
  79. autogen/extensions/__init__.py +0 -0
  80. autogen/formatting_utils.py +76 -0
  81. autogen/function_utils.py +362 -0
  82. autogen/graph_utils.py +148 -0
  83. autogen/io/__init__.py +15 -0
  84. autogen/io/base.py +105 -0
  85. autogen/io/console.py +43 -0
  86. autogen/io/websockets.py +213 -0
  87. autogen/logger/__init__.py +11 -0
  88. autogen/logger/base_logger.py +140 -0
  89. autogen/logger/file_logger.py +287 -0
  90. autogen/logger/logger_factory.py +29 -0
  91. autogen/logger/logger_utils.py +42 -0
  92. autogen/logger/sqlite_logger.py +459 -0
  93. autogen/math_utils.py +356 -0
  94. autogen/oai/__init__.py +33 -0
  95. autogen/oai/anthropic.py +428 -0
  96. autogen/oai/bedrock.py +600 -0
  97. autogen/oai/cerebras.py +264 -0
  98. autogen/oai/client.py +1148 -0
  99. autogen/oai/client_utils.py +167 -0
  100. autogen/oai/cohere.py +453 -0
  101. autogen/oai/completion.py +1216 -0
  102. autogen/oai/gemini.py +469 -0
  103. autogen/oai/groq.py +281 -0
  104. autogen/oai/mistral.py +279 -0
  105. autogen/oai/ollama.py +576 -0
  106. autogen/oai/openai_utils.py +810 -0
  107. autogen/oai/together.py +343 -0
  108. autogen/retrieve_utils.py +487 -0
  109. autogen/runtime_logging.py +163 -0
  110. autogen/token_count_utils.py +257 -0
  111. autogen/types.py +20 -0
  112. autogen/version.py +7 -0
@@ -0,0 +1,109 @@
1
+ # Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ # Portions derived from https://github.com/microsoft/autogen are under the MIT License.
6
+ # SPDX-License-Identifier: MIT
7
+ from typing import Callable, Dict, List, Literal, Optional, Union
8
+
9
+ from ..runtime_logging import log_new_agent, logging_enabled
10
+ from .conversable_agent import ConversableAgent
11
+
12
+
13
+ class UserProxyAgent(ConversableAgent):
14
+ """(In preview) A proxy agent for the user, that can execute code and provide feedback to the other agents.
15
+
16
+ UserProxyAgent is a subclass of ConversableAgent configured with `human_input_mode` to ALWAYS
17
+ and `llm_config` to False. By default, the agent will prompt for human input every time a message is received.
18
+ Code execution is enabled by default. LLM-based auto reply is disabled by default.
19
+ To modify auto reply, register a method with [`register_reply`](conversable_agent#register_reply).
20
+ To modify the way to get human input, override `get_human_input` method.
21
+ To modify the way to execute code blocks, single code block, or function call, override `execute_code_blocks`,
22
+ `run_code`, and `execute_function` methods respectively.
23
+ """
24
+
25
+ # Default UserProxyAgent.description values, based on human_input_mode
26
+ DEFAULT_USER_PROXY_AGENT_DESCRIPTIONS = {
27
+ "ALWAYS": "An attentive HUMAN user who can answer questions about the task, and can perform tasks such as running Python code or inputting command line commands at a Linux terminal and reporting back the execution results.",
28
+ "TERMINATE": "A user that can run Python code or input command line commands at a Linux terminal and report back the execution results.",
29
+ "NEVER": "A computer terminal that performs no other action than running Python scripts (provided to it quoted in ```python code blocks), or sh shell scripts (provided to it quoted in ```sh code blocks).",
30
+ }
31
+
32
+ def __init__(
33
+ self,
34
+ name: str,
35
+ is_termination_msg: Optional[Callable[[Dict], bool]] = None,
36
+ max_consecutive_auto_reply: Optional[int] = None,
37
+ human_input_mode: Literal["ALWAYS", "TERMINATE", "NEVER"] = "ALWAYS",
38
+ function_map: Optional[Dict[str, Callable]] = None,
39
+ code_execution_config: Union[Dict, Literal[False]] = {},
40
+ default_auto_reply: Optional[Union[str, Dict, None]] = "",
41
+ llm_config: Optional[Union[Dict, Literal[False]]] = False,
42
+ system_message: Optional[Union[str, List]] = "",
43
+ description: Optional[str] = None,
44
+ **kwargs,
45
+ ):
46
+ """
47
+ Args:
48
+ name (str): name of the agent.
49
+ is_termination_msg (function): a function that takes a message in the form of a dictionary
50
+ and returns a boolean value indicating if this received message is a termination message.
51
+ The dict can contain the following keys: "content", "role", "name", "function_call".
52
+ max_consecutive_auto_reply (int): the maximum number of consecutive auto replies.
53
+ default to None (no limit provided, class attribute MAX_CONSECUTIVE_AUTO_REPLY will be used as the limit in this case).
54
+ The limit only plays a role when human_input_mode is not "ALWAYS".
55
+ human_input_mode (str): whether to ask for human inputs every time a message is received.
56
+ Possible values are "ALWAYS", "TERMINATE", "NEVER".
57
+ (1) When "ALWAYS", the agent prompts for human input every time a message is received.
58
+ Under this mode, the conversation stops when the human input is "exit",
59
+ or when is_termination_msg is True and there is no human input.
60
+ (2) When "TERMINATE", the agent only prompts for human input only when a termination message is received or
61
+ the number of auto reply reaches the max_consecutive_auto_reply.
62
+ (3) When "NEVER", the agent will never prompt for human input. Under this mode, the conversation stops
63
+ when the number of auto reply reaches the max_consecutive_auto_reply or when is_termination_msg is True.
64
+ function_map (dict[str, callable]): Mapping function names (passed to openai) to callable functions.
65
+ code_execution_config (dict or False): config for the code execution.
66
+ To disable code execution, set to False. Otherwise, set to a dictionary with the following keys:
67
+ - work_dir (Optional, str): The working directory for the code execution.
68
+ If None, a default working directory will be used.
69
+ The default working directory is the "extensions" directory under
70
+ "path_to_autogen".
71
+ - use_docker (Optional, list, str or bool): The docker image to use for code execution.
72
+ Default is True, which means the code will be executed in a docker container. A default list of images will be used.
73
+ If a list or a str of image name(s) is provided, the code will be executed in a docker container
74
+ with the first image successfully pulled.
75
+ If False, the code will be executed in the current environment.
76
+ We strongly recommend using docker for code execution.
77
+ - timeout (Optional, int): The maximum execution time in seconds.
78
+ - last_n_messages (Experimental, Optional, int): The number of messages to look back for code execution. Default to 1.
79
+ default_auto_reply (str or dict or None): the default auto reply message when no code execution or llm based reply is generated.
80
+ llm_config (dict or False or None): llm inference configuration.
81
+ Please refer to [OpenAIWrapper.create](/docs/reference/oai/client#create)
82
+ for available options.
83
+ Default to False, which disables llm-based auto reply.
84
+ When set to None, will use self.DEFAULT_CONFIG, which defaults to False.
85
+ system_message (str or List): system message for ChatCompletion inference.
86
+ Only used when llm_config is not False. Use it to reprogram the agent.
87
+ description (str): a short description of the agent. This description is used by other agents
88
+ (e.g. the GroupChatManager) to decide when to call upon this agent. (Default: system_message)
89
+ **kwargs (dict): Please refer to other kwargs in
90
+ [ConversableAgent](conversable_agent#__init__).
91
+ """
92
+ super().__init__(
93
+ name=name,
94
+ system_message=system_message,
95
+ is_termination_msg=is_termination_msg,
96
+ max_consecutive_auto_reply=max_consecutive_auto_reply,
97
+ human_input_mode=human_input_mode,
98
+ function_map=function_map,
99
+ code_execution_config=code_execution_config,
100
+ llm_config=llm_config,
101
+ default_auto_reply=default_auto_reply,
102
+ description=(
103
+ description if description is not None else self.DEFAULT_USER_PROXY_AGENT_DESCRIPTIONS[human_input_mode]
104
+ ),
105
+ **kwargs,
106
+ )
107
+
108
+ if logging_enabled():
109
+ log_new_agent(self, locals())
@@ -0,0 +1,207 @@
1
+ # Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ # Portions derived from https://github.com/microsoft/autogen are under the MIT License.
6
+ # SPDX-License-Identifier: MIT
7
+ import re
8
+ from typing import Any, Callable, Dict, List, Union
9
+
10
+ from .agent import Agent
11
+
12
+
13
+ def consolidate_chat_info(chat_info, uniform_sender=None) -> None:
14
+ if isinstance(chat_info, dict):
15
+ chat_info = [chat_info]
16
+ for c in chat_info:
17
+ if uniform_sender is None:
18
+ assert "sender" in c, "sender must be provided."
19
+ sender = c["sender"]
20
+ else:
21
+ sender = uniform_sender
22
+ assert "recipient" in c, "recipient must be provided."
23
+ summary_method = c.get("summary_method")
24
+ assert (
25
+ summary_method is None
26
+ or isinstance(summary_method, Callable)
27
+ or summary_method in ("last_msg", "reflection_with_llm")
28
+ ), "summary_method must be a string chosen from 'reflection_with_llm' or 'last_msg' or a callable, or None."
29
+ if summary_method == "reflection_with_llm":
30
+ assert (
31
+ sender.client is not None or c["recipient"].client is not None
32
+ ), "llm client must be set in either the recipient or sender when summary_method is reflection_with_llm."
33
+
34
+
35
+ def gather_usage_summary(agents: List[Agent]) -> Dict[Dict[str, Dict], Dict[str, Dict]]:
36
+ r"""Gather usage summary from all agents.
37
+
38
+ Args:
39
+ agents: (list): List of agents.
40
+
41
+ Returns:
42
+ dictionary: A dictionary containing two keys:
43
+ - "usage_including_cached_inference": Cost information on the total usage, including the tokens in cached inference.
44
+ - "usage_excluding_cached_inference": Cost information on the usage of tokens, excluding the tokens in cache. No larger than "usage_including_cached_inference".
45
+
46
+ Example:
47
+
48
+ ```python
49
+ {
50
+ "usage_including_cached_inference" : {
51
+ "total_cost": 0.0006090000000000001,
52
+ "gpt-35-turbo": {
53
+ "cost": 0.0006090000000000001,
54
+ "prompt_tokens": 242,
55
+ "completion_tokens": 123,
56
+ "total_tokens": 365
57
+ },
58
+ },
59
+
60
+ "usage_excluding_cached_inference" : {
61
+ "total_cost": 0.0006090000000000001,
62
+ "gpt-35-turbo": {
63
+ "cost": 0.0006090000000000001,
64
+ "prompt_tokens": 242,
65
+ "completion_tokens": 123,
66
+ "total_tokens": 365
67
+ },
68
+ }
69
+ }
70
+ ```
71
+
72
+ Note:
73
+
74
+ If none of the agents incurred any cost (not having a client), then the usage_including_cached_inference and usage_excluding_cached_inference will be `{'total_cost': 0}`.
75
+ """
76
+
77
+ def aggregate_summary(usage_summary: Dict[str, Any], agent_summary: Dict[str, Any]) -> None:
78
+ if agent_summary is None:
79
+ return
80
+ usage_summary["total_cost"] += agent_summary.get("total_cost", 0)
81
+ for model, data in agent_summary.items():
82
+ if model != "total_cost":
83
+ if model not in usage_summary:
84
+ usage_summary[model] = data.copy()
85
+ else:
86
+ usage_summary[model]["cost"] += data.get("cost", 0)
87
+ usage_summary[model]["prompt_tokens"] += data.get("prompt_tokens", 0)
88
+ usage_summary[model]["completion_tokens"] += data.get("completion_tokens", 0)
89
+ usage_summary[model]["total_tokens"] += data.get("total_tokens", 0)
90
+
91
+ usage_including_cached_inference = {"total_cost": 0}
92
+ usage_excluding_cached_inference = {"total_cost": 0}
93
+
94
+ for agent in agents:
95
+ if getattr(agent, "client", None):
96
+ aggregate_summary(usage_including_cached_inference, agent.client.total_usage_summary)
97
+ aggregate_summary(usage_excluding_cached_inference, agent.client.actual_usage_summary)
98
+
99
+ return {
100
+ "usage_including_cached_inference": usage_including_cached_inference,
101
+ "usage_excluding_cached_inference": usage_excluding_cached_inference,
102
+ }
103
+
104
+
105
+ def parse_tags_from_content(tag: str, content: Union[str, List[Dict[str, Any]]]) -> List[Dict[str, Dict[str, str]]]:
106
+ """Parses HTML style tags from message contents.
107
+
108
+ The parsing is done by looking for patterns in the text that match the format of HTML tags. The tag to be parsed is
109
+ specified as an argument to the function. The function looks for this tag in the text and extracts its content. The
110
+ content of a tag is everything that is inside the tag, between the opening and closing angle brackets. The content
111
+ can be a single string or a set of attribute-value pairs.
112
+
113
+ Examples:
114
+ <img http://example.com/image.png> -> [{"tag": "img", "attr": {"src": "http://example.com/image.png"}, "match": re.Match}]
115
+ <audio text="Hello I'm a robot" prompt="whisper"> ->
116
+ [{"tag": "audio", "attr": {"text": "Hello I'm a robot", "prompt": "whisper"}, "match": re.Match}]
117
+
118
+ Args:
119
+ tag (str): The HTML style tag to be parsed.
120
+ content (Union[str, List[Dict[str, Any]]]): The message content to parse. Can be a string or a list of content
121
+ items.
122
+
123
+ Returns:
124
+ List[Dict[str, str]]: A list of dictionaries, where each dictionary represents a parsed tag. Each dictionary
125
+ contains three key-value pairs: 'type' which is the tag, 'attr' which is a dictionary of the parsed attributes,
126
+ and 'match' which is a regular expression match object.
127
+
128
+ Raises:
129
+ ValueError: If the content is not a string or a list.
130
+ """
131
+ results = []
132
+ if isinstance(content, str):
133
+ results.extend(_parse_tags_from_text(tag, content))
134
+ # Handles case for multimodal messages.
135
+ elif isinstance(content, list):
136
+ for item in content:
137
+ if item.get("type") == "text":
138
+ results.extend(_parse_tags_from_text(tag, item["text"]))
139
+ else:
140
+ raise ValueError(f"content must be str or list, but got {type(content)}")
141
+
142
+ return results
143
+
144
+
145
+ def _parse_tags_from_text(tag: str, text: str) -> List[Dict[str, str]]:
146
+ pattern = re.compile(f"<{tag} (.*?)>")
147
+
148
+ results = []
149
+ for match in re.finditer(pattern, text):
150
+ tag_attr = match.group(1).strip()
151
+ attr = _parse_attributes_from_tags(tag_attr)
152
+
153
+ results.append({"tag": tag, "attr": attr, "match": match})
154
+ return results
155
+
156
+
157
+ def _parse_attributes_from_tags(tag_content: str):
158
+ pattern = r"([^ ]+)"
159
+ attrs = re.findall(pattern, tag_content)
160
+ reconstructed_attrs = _reconstruct_attributes(attrs)
161
+
162
+ def _append_src_value(content, value):
163
+ if "src" in content:
164
+ content["src"] += f" {value}"
165
+ else:
166
+ content["src"] = value
167
+
168
+ content = {}
169
+ for attr in reconstructed_attrs:
170
+ if "=" not in attr:
171
+ _append_src_value(content, attr)
172
+ continue
173
+
174
+ key, value = attr.split("=", 1)
175
+ if value.startswith("'") or value.startswith('"'):
176
+ content[key] = value[1:-1] # remove quotes
177
+ else:
178
+ _append_src_value(content, attr)
179
+
180
+ return content
181
+
182
+
183
+ def _reconstruct_attributes(attrs: List[str]) -> List[str]:
184
+ """Reconstructs attributes from a list of strings where some attributes may be split across multiple elements."""
185
+
186
+ def is_attr(attr: str) -> bool:
187
+ if "=" in attr:
188
+ _, value = attr.split("=", 1)
189
+ if value.startswith("'") or value.startswith('"'):
190
+ return True
191
+ return False
192
+
193
+ reconstructed = []
194
+ found_attr = False
195
+ for attr in attrs:
196
+ if is_attr(attr):
197
+ reconstructed.append(attr)
198
+ found_attr = True
199
+ else:
200
+ if found_attr:
201
+ reconstructed[-1] += f" {attr}"
202
+ found_attr = True
203
+ elif reconstructed:
204
+ reconstructed[-1] += f" {attr}"
205
+ else:
206
+ reconstructed.append(attr)
207
+ return reconstructed
@@ -0,0 +1,291 @@
1
+ # Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ # Portions derived from https://github.com/microsoft/autogen are under the MIT License.
6
+ # SPDX-License-Identifier: MIT
7
+ import io
8
+ import json
9
+ import mimetypes
10
+ import os
11
+ import re
12
+ import uuid
13
+ from typing import Any, Dict, List, Optional, Tuple, Union
14
+ from urllib.parse import urljoin, urlparse
15
+
16
+ import markdownify
17
+ import requests
18
+ from bs4 import BeautifulSoup
19
+
20
+ # Optional PDF support
21
+ IS_PDF_CAPABLE = False
22
+ try:
23
+ import pdfminer
24
+ import pdfminer.high_level
25
+
26
+ IS_PDF_CAPABLE = True
27
+ except ModuleNotFoundError:
28
+ pass
29
+
30
+ # Other optional dependencies
31
+ try:
32
+ import pathvalidate
33
+ except ModuleNotFoundError:
34
+ pass
35
+
36
+
37
+ class SimpleTextBrowser:
38
+ """(In preview) An extremely simple text-based web browser comparable to Lynx. Suitable for Agentic use."""
39
+
40
+ def __init__(
41
+ self,
42
+ start_page: Optional[str] = None,
43
+ viewport_size: Optional[int] = 1024 * 8,
44
+ downloads_folder: Optional[Union[str, None]] = None,
45
+ bing_base_url: str = "https://api.bing.microsoft.com/v7.0/search",
46
+ bing_api_key: Optional[Union[str, None]] = None,
47
+ request_kwargs: Optional[Union[Dict[str, Any], None]] = None,
48
+ ):
49
+ self.start_page: str = start_page if start_page else "about:blank"
50
+ self.viewport_size = viewport_size # Applies only to the standard uri types
51
+ self.downloads_folder = downloads_folder
52
+ self.history: List[str] = list()
53
+ self.page_title: Optional[str] = None
54
+ self.viewport_current_page = 0
55
+ self.viewport_pages: List[Tuple[int, int]] = list()
56
+ self.set_address(self.start_page)
57
+ self.bing_base_url = bing_base_url
58
+ self.bing_api_key = bing_api_key
59
+ self.request_kwargs = request_kwargs
60
+
61
+ self._page_content = ""
62
+
63
+ @property
64
+ def address(self) -> str:
65
+ """Return the address of the current page."""
66
+ return self.history[-1]
67
+
68
+ def set_address(self, uri_or_path: str) -> None:
69
+ self.history.append(uri_or_path)
70
+
71
+ # Handle special URIs
72
+ if uri_or_path == "about:blank":
73
+ self._set_page_content("")
74
+ elif uri_or_path.startswith("bing:"):
75
+ self._bing_search(uri_or_path[len("bing:") :].strip())
76
+ else:
77
+ if not uri_or_path.startswith("http:") and not uri_or_path.startswith("https:"):
78
+ uri_or_path = urljoin(self.address, uri_or_path)
79
+ self.history[-1] = uri_or_path # Update the address with the fully-qualified path
80
+ self._fetch_page(uri_or_path)
81
+
82
+ self.viewport_current_page = 0
83
+
84
+ @property
85
+ def viewport(self) -> str:
86
+ """Return the content of the current viewport."""
87
+ bounds = self.viewport_pages[self.viewport_current_page]
88
+ return self.page_content[bounds[0] : bounds[1]]
89
+
90
+ @property
91
+ def page_content(self) -> str:
92
+ """Return the full contents of the current page."""
93
+ return self._page_content
94
+
95
+ def _set_page_content(self, content: str) -> None:
96
+ """Sets the text content of the current page."""
97
+ self._page_content = content
98
+ self._split_pages()
99
+ if self.viewport_current_page >= len(self.viewport_pages):
100
+ self.viewport_current_page = len(self.viewport_pages) - 1
101
+
102
+ def page_down(self) -> None:
103
+ self.viewport_current_page = min(self.viewport_current_page + 1, len(self.viewport_pages) - 1)
104
+
105
+ def page_up(self) -> None:
106
+ self.viewport_current_page = max(self.viewport_current_page - 1, 0)
107
+
108
+ def visit_page(self, path_or_uri: str) -> str:
109
+ """Update the address, visit the page, and return the content of the viewport."""
110
+ self.set_address(path_or_uri)
111
+ return self.viewport
112
+
113
+ def _split_pages(self) -> None:
114
+ # Split only regular pages
115
+ if not self.address.startswith("http:") and not self.address.startswith("https:"):
116
+ self.viewport_pages = [(0, len(self._page_content))]
117
+ return
118
+
119
+ # Handle empty pages
120
+ if len(self._page_content) == 0:
121
+ self.viewport_pages = [(0, 0)]
122
+ return
123
+
124
+ # Break the viewport into pages
125
+ self.viewport_pages = []
126
+ start_idx = 0
127
+ while start_idx < len(self._page_content):
128
+ end_idx = min(start_idx + self.viewport_size, len(self._page_content)) # type: ignore[operator]
129
+ # Adjust to end on a space
130
+ while end_idx < len(self._page_content) and self._page_content[end_idx - 1] not in [" ", "\t", "\r", "\n"]:
131
+ end_idx += 1
132
+ self.viewport_pages.append((start_idx, end_idx))
133
+ start_idx = end_idx
134
+
135
+ def _bing_api_call(self, query: str) -> Dict[str, Dict[str, List[Dict[str, Union[str, Dict[str, str]]]]]]:
136
+ # Make sure the key was set
137
+ if self.bing_api_key is None:
138
+ raise ValueError("Missing Bing API key.")
139
+
140
+ # Prepare the request parameters
141
+ request_kwargs = self.request_kwargs.copy() if self.request_kwargs is not None else {}
142
+
143
+ if "headers" not in request_kwargs:
144
+ request_kwargs["headers"] = {}
145
+ request_kwargs["headers"]["Ocp-Apim-Subscription-Key"] = self.bing_api_key
146
+
147
+ if "params" not in request_kwargs:
148
+ request_kwargs["params"] = {}
149
+ request_kwargs["params"]["q"] = query
150
+ request_kwargs["params"]["textDecorations"] = False
151
+ request_kwargs["params"]["textFormat"] = "raw"
152
+
153
+ request_kwargs["stream"] = False
154
+
155
+ # Make the request
156
+ response = requests.get(self.bing_base_url, **request_kwargs)
157
+ response.raise_for_status()
158
+ results = response.json()
159
+
160
+ return results # type: ignore[no-any-return]
161
+
162
+ def _bing_search(self, query: str) -> None:
163
+ results = self._bing_api_call(query)
164
+
165
+ web_snippets: List[str] = list()
166
+ idx = 0
167
+ for page in results["webPages"]["value"]:
168
+ idx += 1
169
+ web_snippets.append(f"{idx}. [{page['name']}]({page['url']})\n{page['snippet']}")
170
+ if "deepLinks" in page:
171
+ for dl in page["deepLinks"]:
172
+ idx += 1
173
+ web_snippets.append(
174
+ f"{idx}. [{dl['name']}]({dl['url']})\n{dl['snippet'] if 'snippet' in dl else ''}" # type: ignore[index]
175
+ )
176
+
177
+ news_snippets = list()
178
+ if "news" in results:
179
+ for page in results["news"]["value"]:
180
+ idx += 1
181
+ news_snippets.append(f"{idx}. [{page['name']}]({page['url']})\n{page['description']}")
182
+
183
+ self.page_title = f"{query} - Search"
184
+
185
+ content = (
186
+ f"A Bing search for '{query}' found {len(web_snippets) + len(news_snippets)} results:\n\n## Web Results\n"
187
+ + "\n\n".join(web_snippets)
188
+ )
189
+ if len(news_snippets) > 0:
190
+ content += "\n\n## News Results:\n" + "\n\n".join(news_snippets)
191
+ self._set_page_content(content)
192
+
193
+ def _fetch_page(self, url: str) -> None:
194
+ try:
195
+ # Prepare the request parameters
196
+ request_kwargs = self.request_kwargs.copy() if self.request_kwargs is not None else {}
197
+ request_kwargs["stream"] = True
198
+
199
+ # Send a HTTP request to the URL
200
+ response = requests.get(url, **request_kwargs)
201
+ response.raise_for_status()
202
+
203
+ # If the HTTP request returns a status code 200, proceed
204
+ if response.status_code == 200:
205
+ content_type = response.headers.get("content-type", "")
206
+ for ct in ["text/html", "text/plain", "application/pdf"]:
207
+ if ct in content_type.lower():
208
+ content_type = ct
209
+ break
210
+
211
+ if content_type == "text/html":
212
+ # Get the content of the response
213
+ html = ""
214
+ for chunk in response.iter_content(chunk_size=512, decode_unicode=True):
215
+ html += chunk
216
+
217
+ soup = BeautifulSoup(html, "html.parser")
218
+
219
+ # Remove javascript and style blocks
220
+ for script in soup(["script", "style"]):
221
+ script.extract()
222
+
223
+ # Convert to markdown -- Wikipedia gets special attention to get a clean version of the page
224
+ if url.startswith("https://en.wikipedia.org/"):
225
+ body_elm = soup.find("div", {"id": "mw-content-text"})
226
+ title_elm = soup.find("span", {"class": "mw-page-title-main"})
227
+
228
+ if body_elm:
229
+ # What's the title
230
+ main_title = soup.title.string
231
+ if title_elm and len(title_elm) > 0:
232
+ main_title = title_elm.string
233
+ webpage_text = (
234
+ "# " + main_title + "\n\n" + markdownify.MarkdownConverter().convert_soup(body_elm)
235
+ )
236
+ else:
237
+ webpage_text = markdownify.MarkdownConverter().convert_soup(soup)
238
+ else:
239
+ webpage_text = markdownify.MarkdownConverter().convert_soup(soup)
240
+
241
+ # Convert newlines
242
+ webpage_text = re.sub(r"\r\n", "\n", webpage_text)
243
+
244
+ # Remove excessive blank lines
245
+ self.page_title = soup.title.string
246
+ self._set_page_content(re.sub(r"\n{2,}", "\n\n", webpage_text).strip())
247
+ elif content_type == "text/plain":
248
+ # Get the content of the response
249
+ plain_text = ""
250
+ for chunk in response.iter_content(chunk_size=512, decode_unicode=True):
251
+ plain_text += chunk
252
+
253
+ self.page_title = None
254
+ self._set_page_content(plain_text)
255
+ elif IS_PDF_CAPABLE and content_type == "application/pdf":
256
+ pdf_data = io.BytesIO(response.raw.read())
257
+ self.page_title = None
258
+ self._set_page_content(pdfminer.high_level.extract_text(pdf_data))
259
+ elif self.downloads_folder is not None:
260
+ # Try producing a safe filename
261
+ fname = None
262
+ try:
263
+ fname = pathvalidate.sanitize_filename(os.path.basename(urlparse(url).path)).strip()
264
+ except NameError:
265
+ pass
266
+
267
+ # No suitable name, so make one
268
+ if fname is None:
269
+ extension = mimetypes.guess_extension(content_type)
270
+ if extension is None:
271
+ extension = ".download"
272
+ fname = str(uuid.uuid4()) + extension
273
+
274
+ # Open a file for writing
275
+ download_path = os.path.abspath(os.path.join(self.downloads_folder, fname))
276
+ with open(download_path, "wb") as fh:
277
+ for chunk in response.iter_content(chunk_size=512):
278
+ fh.write(chunk)
279
+
280
+ # Return a page describing what just happened
281
+ self.page_title = "Download complete."
282
+ self._set_page_content(f"Downloaded '{url}' to '{download_path}'.")
283
+ else:
284
+ self.page_title = f"Error - Unsupported Content-Type '{content_type}'"
285
+ self._set_page_content(self.page_title)
286
+ else:
287
+ self.page_title = "Error"
288
+ self._set_page_content("Failed to retrieve " + url)
289
+ except requests.exceptions.RequestException as e:
290
+ self.page_title = "Error"
291
+ self._set_page_content(str(e))
@@ -0,0 +1,10 @@
1
+ # Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ # Portions derived from https://github.com/microsoft/autogen are under the MIT License.
6
+ # SPDX-License-Identifier: MIT
7
+ from .abstract_cache_base import AbstractCache
8
+ from .cache import Cache
9
+
10
+ __all__ = ["Cache", "AbstractCache"]