ag2 0.4.1__py3-none-any.whl → 0.5.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ag2 might be problematic. Click here for more details.
- {ag2-0.4.1.dist-info → ag2-0.5.0b2.dist-info}/METADATA +5 -146
- ag2-0.5.0b2.dist-info/RECORD +6 -0
- ag2-0.5.0b2.dist-info/top_level.txt +1 -0
- ag2-0.4.1.dist-info/RECORD +0 -158
- ag2-0.4.1.dist-info/top_level.txt +0 -1
- autogen/__init__.py +0 -17
- autogen/_pydantic.py +0 -116
- autogen/agentchat/__init__.py +0 -42
- autogen/agentchat/agent.py +0 -142
- autogen/agentchat/assistant_agent.py +0 -85
- autogen/agentchat/chat.py +0 -306
- autogen/agentchat/contrib/__init__.py +0 -0
- autogen/agentchat/contrib/agent_builder.py +0 -788
- autogen/agentchat/contrib/agent_eval/agent_eval.py +0 -107
- autogen/agentchat/contrib/agent_eval/criterion.py +0 -47
- autogen/agentchat/contrib/agent_eval/critic_agent.py +0 -47
- autogen/agentchat/contrib/agent_eval/quantifier_agent.py +0 -42
- autogen/agentchat/contrib/agent_eval/subcritic_agent.py +0 -48
- autogen/agentchat/contrib/agent_eval/task.py +0 -43
- autogen/agentchat/contrib/agent_optimizer.py +0 -450
- autogen/agentchat/contrib/capabilities/__init__.py +0 -0
- autogen/agentchat/contrib/capabilities/agent_capability.py +0 -21
- autogen/agentchat/contrib/capabilities/generate_images.py +0 -297
- autogen/agentchat/contrib/capabilities/teachability.py +0 -406
- autogen/agentchat/contrib/capabilities/text_compressors.py +0 -72
- autogen/agentchat/contrib/capabilities/transform_messages.py +0 -92
- autogen/agentchat/contrib/capabilities/transforms.py +0 -565
- autogen/agentchat/contrib/capabilities/transforms_util.py +0 -120
- autogen/agentchat/contrib/capabilities/vision_capability.py +0 -217
- autogen/agentchat/contrib/captainagent/tools/__init__.py +0 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_correlation.py +0 -41
- autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_skewness_and_kurtosis.py +0 -29
- autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_iqr.py +0 -29
- autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_zscore.py +0 -29
- autogen/agentchat/contrib/captainagent/tools/data_analysis/explore_csv.py +0 -22
- autogen/agentchat/contrib/captainagent/tools/data_analysis/shapiro_wilk_test.py +0 -31
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_download.py +0 -26
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_search.py +0 -55
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_image.py +0 -54
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_text.py +0 -39
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_wikipedia_text.py +0 -22
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_youtube_caption.py +0 -35
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/image_qa.py +0 -61
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/optical_character_recognition.py +0 -62
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/perform_web_search.py +0 -48
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/scrape_wikipedia_tables.py +0 -34
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/transcribe_audio_file.py +0 -22
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/youtube_download.py +0 -36
- autogen/agentchat/contrib/captainagent/tools/math/calculate_circle_area_from_diameter.py +0 -22
- autogen/agentchat/contrib/captainagent/tools/math/calculate_day_of_the_week.py +0 -19
- autogen/agentchat/contrib/captainagent/tools/math/calculate_fraction_sum.py +0 -29
- autogen/agentchat/contrib/captainagent/tools/math/calculate_matrix_power.py +0 -32
- autogen/agentchat/contrib/captainagent/tools/math/calculate_reflected_point.py +0 -17
- autogen/agentchat/contrib/captainagent/tools/math/complex_numbers_product.py +0 -26
- autogen/agentchat/contrib/captainagent/tools/math/compute_currency_conversion.py +0 -24
- autogen/agentchat/contrib/captainagent/tools/math/count_distinct_permutations.py +0 -28
- autogen/agentchat/contrib/captainagent/tools/math/evaluate_expression.py +0 -29
- autogen/agentchat/contrib/captainagent/tools/math/find_continuity_point.py +0 -35
- autogen/agentchat/contrib/captainagent/tools/math/fraction_to_mixed_numbers.py +0 -40
- autogen/agentchat/contrib/captainagent/tools/math/modular_inverse_sum.py +0 -23
- autogen/agentchat/contrib/captainagent/tools/math/simplify_mixed_numbers.py +0 -37
- autogen/agentchat/contrib/captainagent/tools/math/sum_of_digit_factorials.py +0 -16
- autogen/agentchat/contrib/captainagent/tools/math/sum_of_primes_below.py +0 -16
- autogen/agentchat/contrib/captainagent/tools/requirements.txt +0 -10
- autogen/agentchat/contrib/captainagent/tools/tool_description.tsv +0 -34
- autogen/agentchat/contrib/captainagent.py +0 -490
- autogen/agentchat/contrib/gpt_assistant_agent.py +0 -545
- autogen/agentchat/contrib/graph_rag/__init__.py +0 -0
- autogen/agentchat/contrib/graph_rag/document.py +0 -30
- autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py +0 -111
- autogen/agentchat/contrib/graph_rag/falkor_graph_rag_capability.py +0 -81
- autogen/agentchat/contrib/graph_rag/graph_query_engine.py +0 -56
- autogen/agentchat/contrib/graph_rag/graph_rag_capability.py +0 -64
- autogen/agentchat/contrib/img_utils.py +0 -390
- autogen/agentchat/contrib/llamaindex_conversable_agent.py +0 -123
- autogen/agentchat/contrib/llava_agent.py +0 -176
- autogen/agentchat/contrib/math_user_proxy_agent.py +0 -471
- autogen/agentchat/contrib/multimodal_conversable_agent.py +0 -128
- autogen/agentchat/contrib/qdrant_retrieve_user_proxy_agent.py +0 -325
- autogen/agentchat/contrib/retrieve_assistant_agent.py +0 -56
- autogen/agentchat/contrib/retrieve_user_proxy_agent.py +0 -705
- autogen/agentchat/contrib/society_of_mind_agent.py +0 -203
- autogen/agentchat/contrib/swarm_agent.py +0 -463
- autogen/agentchat/contrib/text_analyzer_agent.py +0 -76
- autogen/agentchat/contrib/tool_retriever.py +0 -120
- autogen/agentchat/contrib/vectordb/__init__.py +0 -0
- autogen/agentchat/contrib/vectordb/base.py +0 -243
- autogen/agentchat/contrib/vectordb/chromadb.py +0 -326
- autogen/agentchat/contrib/vectordb/mongodb.py +0 -559
- autogen/agentchat/contrib/vectordb/pgvectordb.py +0 -958
- autogen/agentchat/contrib/vectordb/qdrant.py +0 -334
- autogen/agentchat/contrib/vectordb/utils.py +0 -126
- autogen/agentchat/contrib/web_surfer.py +0 -305
- autogen/agentchat/conversable_agent.py +0 -2908
- autogen/agentchat/groupchat.py +0 -1668
- autogen/agentchat/user_proxy_agent.py +0 -109
- autogen/agentchat/utils.py +0 -207
- autogen/browser_utils.py +0 -291
- autogen/cache/__init__.py +0 -10
- autogen/cache/abstract_cache_base.py +0 -78
- autogen/cache/cache.py +0 -182
- autogen/cache/cache_factory.py +0 -85
- autogen/cache/cosmos_db_cache.py +0 -150
- autogen/cache/disk_cache.py +0 -109
- autogen/cache/in_memory_cache.py +0 -61
- autogen/cache/redis_cache.py +0 -128
- autogen/code_utils.py +0 -745
- autogen/coding/__init__.py +0 -22
- autogen/coding/base.py +0 -113
- autogen/coding/docker_commandline_code_executor.py +0 -262
- autogen/coding/factory.py +0 -45
- autogen/coding/func_with_reqs.py +0 -203
- autogen/coding/jupyter/__init__.py +0 -22
- autogen/coding/jupyter/base.py +0 -32
- autogen/coding/jupyter/docker_jupyter_server.py +0 -164
- autogen/coding/jupyter/embedded_ipython_code_executor.py +0 -182
- autogen/coding/jupyter/jupyter_client.py +0 -224
- autogen/coding/jupyter/jupyter_code_executor.py +0 -161
- autogen/coding/jupyter/local_jupyter_server.py +0 -168
- autogen/coding/local_commandline_code_executor.py +0 -410
- autogen/coding/markdown_code_extractor.py +0 -44
- autogen/coding/utils.py +0 -57
- autogen/exception_utils.py +0 -46
- autogen/extensions/__init__.py +0 -0
- autogen/formatting_utils.py +0 -76
- autogen/function_utils.py +0 -362
- autogen/graph_utils.py +0 -148
- autogen/io/__init__.py +0 -15
- autogen/io/base.py +0 -105
- autogen/io/console.py +0 -43
- autogen/io/websockets.py +0 -213
- autogen/logger/__init__.py +0 -11
- autogen/logger/base_logger.py +0 -140
- autogen/logger/file_logger.py +0 -287
- autogen/logger/logger_factory.py +0 -29
- autogen/logger/logger_utils.py +0 -42
- autogen/logger/sqlite_logger.py +0 -459
- autogen/math_utils.py +0 -356
- autogen/oai/__init__.py +0 -33
- autogen/oai/anthropic.py +0 -428
- autogen/oai/bedrock.py +0 -606
- autogen/oai/cerebras.py +0 -270
- autogen/oai/client.py +0 -1148
- autogen/oai/client_utils.py +0 -167
- autogen/oai/cohere.py +0 -453
- autogen/oai/completion.py +0 -1216
- autogen/oai/gemini.py +0 -469
- autogen/oai/groq.py +0 -281
- autogen/oai/mistral.py +0 -279
- autogen/oai/ollama.py +0 -582
- autogen/oai/openai_utils.py +0 -811
- autogen/oai/together.py +0 -343
- autogen/retrieve_utils.py +0 -487
- autogen/runtime_logging.py +0 -163
- autogen/token_count_utils.py +0 -259
- autogen/types.py +0 -20
- autogen/version.py +0 -7
- {ag2-0.4.1.dist-info → ag2-0.5.0b2.dist-info}/LICENSE +0 -0
- {ag2-0.4.1.dist-info → ag2-0.5.0b2.dist-info}/NOTICE.md +0 -0
- {ag2-0.4.1.dist-info → ag2-0.5.0b2.dist-info}/WHEEL +0 -0
|
@@ -1,217 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
#
|
|
5
|
-
# Portions derived from https://github.com/microsoft/autogen are under the MIT License.
|
|
6
|
-
# SPDX-License-Identifier: MIT
|
|
7
|
-
import copy
|
|
8
|
-
from typing import Callable, Dict, List, Optional, Union
|
|
9
|
-
|
|
10
|
-
from autogen.agentchat.assistant_agent import ConversableAgent
|
|
11
|
-
from autogen.agentchat.contrib.capabilities.agent_capability import AgentCapability
|
|
12
|
-
from autogen.agentchat.contrib.img_utils import (
|
|
13
|
-
convert_base64_to_data_uri,
|
|
14
|
-
get_image_data,
|
|
15
|
-
get_pil_image,
|
|
16
|
-
gpt4v_formatter,
|
|
17
|
-
message_formatter_pil_to_b64,
|
|
18
|
-
)
|
|
19
|
-
from autogen.agentchat.contrib.multimodal_conversable_agent import MultimodalConversableAgent
|
|
20
|
-
from autogen.agentchat.conversable_agent import colored
|
|
21
|
-
from autogen.code_utils import content_str
|
|
22
|
-
from autogen.oai.client import OpenAIWrapper
|
|
23
|
-
|
|
24
|
-
DEFAULT_DESCRIPTION_PROMPT = (
|
|
25
|
-
"Write a detailed caption for this image. "
|
|
26
|
-
"Pay special attention to any details that might be useful or relevant "
|
|
27
|
-
"to the ongoing conversation."
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class VisionCapability(AgentCapability):
|
|
32
|
-
"""We can add vision capability to regular ConversableAgent, even if the agent does not have the multimodal capability,
|
|
33
|
-
such as GPT-3.5-turbo agent, Llama, Orca, or Mistral agents. This vision capability will invoke a LMM client to describe
|
|
34
|
-
the image (captioning) before sending the information to the agent's actual client.
|
|
35
|
-
|
|
36
|
-
The vision capability will hook to the ConversableAgent's `process_last_received_message`.
|
|
37
|
-
|
|
38
|
-
Some technical details:
|
|
39
|
-
When the agent (who has the vision capability) received an message, it will:
|
|
40
|
-
1. _process_received_message:
|
|
41
|
-
a. _append_oai_message
|
|
42
|
-
2. generate_reply: if the agent is a MultimodalAgent, it will also use the image tag.
|
|
43
|
-
a. hook process_last_received_message (NOTE: this is where the vision capability will be hooked to.)
|
|
44
|
-
b. hook process_all_messages_before_reply
|
|
45
|
-
3. send:
|
|
46
|
-
a. hook process_message_before_send
|
|
47
|
-
b. _append_oai_message
|
|
48
|
-
"""
|
|
49
|
-
|
|
50
|
-
def __init__(
|
|
51
|
-
self,
|
|
52
|
-
lmm_config: Dict,
|
|
53
|
-
description_prompt: Optional[str] = DEFAULT_DESCRIPTION_PROMPT,
|
|
54
|
-
custom_caption_func: Callable = None,
|
|
55
|
-
) -> None:
|
|
56
|
-
"""
|
|
57
|
-
Initializes a new instance, setting up the configuration for interacting with
|
|
58
|
-
a Language Multimodal (LMM) client and specifying optional parameters for image
|
|
59
|
-
description and captioning.
|
|
60
|
-
|
|
61
|
-
Args:
|
|
62
|
-
lmm_config (Dict): Configuration for the LMM client, which is used to call
|
|
63
|
-
the LMM service for describing the image. This must be a dictionary containing
|
|
64
|
-
the necessary configuration parameters. If `lmm_config` is False or an empty dictionary,
|
|
65
|
-
it is considered invalid, and initialization will assert.
|
|
66
|
-
description_prompt (Optional[str], optional): The prompt to use for generating
|
|
67
|
-
descriptions of the image. This parameter allows customization of the
|
|
68
|
-
prompt passed to the LMM service. Defaults to `DEFAULT_DESCRIPTION_PROMPT` if not provided.
|
|
69
|
-
custom_caption_func (Callable, optional): A callable that, if provided, will be used
|
|
70
|
-
to generate captions for images. This allows for custom captioning logic outside
|
|
71
|
-
of the standard LMM service interaction.
|
|
72
|
-
The callable should take three parameters as input:
|
|
73
|
-
1. an image URL (or local location)
|
|
74
|
-
2. image_data (a PIL image)
|
|
75
|
-
3. lmm_client (to call remote LMM)
|
|
76
|
-
and then return a description (as string).
|
|
77
|
-
If not provided, captioning will rely on the LMM client configured via `lmm_config`.
|
|
78
|
-
If provided, we will not run the default self._get_image_caption method.
|
|
79
|
-
|
|
80
|
-
Raises:
|
|
81
|
-
AssertionError: If neither a valid `lmm_config` nor a `custom_caption_func` is provided,
|
|
82
|
-
an AssertionError is raised to indicate that the Vision Capability requires
|
|
83
|
-
one of these to be valid for operation.
|
|
84
|
-
"""
|
|
85
|
-
self._lmm_config = lmm_config
|
|
86
|
-
self._description_prompt = description_prompt
|
|
87
|
-
self._parent_agent = None
|
|
88
|
-
|
|
89
|
-
if lmm_config:
|
|
90
|
-
self._lmm_client = OpenAIWrapper(**lmm_config)
|
|
91
|
-
else:
|
|
92
|
-
self._lmm_client = None
|
|
93
|
-
|
|
94
|
-
self._custom_caption_func = custom_caption_func
|
|
95
|
-
assert (
|
|
96
|
-
self._lmm_config or custom_caption_func
|
|
97
|
-
), "Vision Capability requires a valid lmm_config or custom_caption_func."
|
|
98
|
-
|
|
99
|
-
def add_to_agent(self, agent: ConversableAgent) -> None:
|
|
100
|
-
self._parent_agent = agent
|
|
101
|
-
|
|
102
|
-
# Append extra info to the system message.
|
|
103
|
-
agent.update_system_message(agent.system_message + "\nYou've been given the ability to interpret images.")
|
|
104
|
-
|
|
105
|
-
# Register a hook for processing the last message.
|
|
106
|
-
agent.register_hook(hookable_method="process_last_received_message", hook=self.process_last_received_message)
|
|
107
|
-
|
|
108
|
-
def process_last_received_message(self, content: Union[str, List[dict]]) -> str:
|
|
109
|
-
"""
|
|
110
|
-
Processes the last received message content by normalizing and augmenting it
|
|
111
|
-
with descriptions of any included images. The function supports input content
|
|
112
|
-
as either a string or a list of dictionaries, where each dictionary represents
|
|
113
|
-
a content item (e.g., text, image). If the content contains image URLs, it
|
|
114
|
-
fetches the image data, generates a caption for each image, and inserts the
|
|
115
|
-
caption into the augmented content.
|
|
116
|
-
|
|
117
|
-
The function aims to transform the content into a format compatible with GPT-4V
|
|
118
|
-
multimodal inputs, specifically by formatting strings into PIL-compatible
|
|
119
|
-
images if needed and appending text descriptions for images. This allows for
|
|
120
|
-
a more accessible presentation of the content, especially in contexts where
|
|
121
|
-
images cannot be displayed directly.
|
|
122
|
-
|
|
123
|
-
Args:
|
|
124
|
-
content (Union[str, List[dict]]): The last received message content, which
|
|
125
|
-
can be a plain text string or a list of dictionaries representing
|
|
126
|
-
different types of content items (e.g., text, image_url).
|
|
127
|
-
|
|
128
|
-
Returns:
|
|
129
|
-
str: The augmented message content
|
|
130
|
-
|
|
131
|
-
Raises:
|
|
132
|
-
AssertionError: If an item in the content list is not a dictionary.
|
|
133
|
-
|
|
134
|
-
Examples:
|
|
135
|
-
Assuming `self._get_image_caption(img_data)` returns
|
|
136
|
-
"A beautiful sunset over the mountains" for the image.
|
|
137
|
-
|
|
138
|
-
- Input as String:
|
|
139
|
-
content = "Check out this cool photo!"
|
|
140
|
-
Output: "Check out this cool photo!"
|
|
141
|
-
(Content is a string without an image, remains unchanged.)
|
|
142
|
-
|
|
143
|
-
- Input as String, with image location:
|
|
144
|
-
content = "What's weather in this cool photo: <img http://example.com/photo.jpg>"
|
|
145
|
-
Output: "What's weather in this cool photo: <img http://example.com/photo.jpg> in case you can not see, the caption of this image is:
|
|
146
|
-
A beautiful sunset over the mountains\n"
|
|
147
|
-
(Caption added after the image)
|
|
148
|
-
|
|
149
|
-
- Input as List with Text Only:
|
|
150
|
-
content = [{"type": "text", "text": "Here's an interesting fact."}]
|
|
151
|
-
Output: "Here's an interesting fact."
|
|
152
|
-
(No images in the content, it remains unchanged.)
|
|
153
|
-
|
|
154
|
-
- Input as List with Image URL:
|
|
155
|
-
content = [
|
|
156
|
-
{"type": "text", "text": "What's weather in this cool photo:"},
|
|
157
|
-
{"type": "image_url", "image_url": {"url": "http://example.com/photo.jpg"}}
|
|
158
|
-
]
|
|
159
|
-
Output: "What's weather in this cool photo: <img http://example.com/photo.jpg> in case you can not see, the caption of this image is:
|
|
160
|
-
A beautiful sunset over the mountains\n"
|
|
161
|
-
(Caption added after the image)
|
|
162
|
-
"""
|
|
163
|
-
copy.deepcopy(content)
|
|
164
|
-
# normalize the content into the gpt-4v format for multimodal
|
|
165
|
-
# we want to keep the URL format to keep it concise.
|
|
166
|
-
if isinstance(content, str):
|
|
167
|
-
content = gpt4v_formatter(content, img_format="url")
|
|
168
|
-
|
|
169
|
-
aug_content: str = ""
|
|
170
|
-
for item in content:
|
|
171
|
-
assert isinstance(item, dict)
|
|
172
|
-
if item["type"] == "text":
|
|
173
|
-
aug_content += item["text"]
|
|
174
|
-
elif item["type"] == "image_url":
|
|
175
|
-
img_url = item["image_url"]["url"]
|
|
176
|
-
img_caption = ""
|
|
177
|
-
|
|
178
|
-
if self._custom_caption_func:
|
|
179
|
-
img_caption = self._custom_caption_func(img_url, get_pil_image(img_url), self._lmm_client)
|
|
180
|
-
elif self._lmm_client:
|
|
181
|
-
img_data = get_image_data(img_url)
|
|
182
|
-
img_caption = self._get_image_caption(img_data)
|
|
183
|
-
else:
|
|
184
|
-
img_caption = ""
|
|
185
|
-
|
|
186
|
-
aug_content += f"<img {img_url}> in case you can not see, the caption of this image is: {img_caption}\n"
|
|
187
|
-
else:
|
|
188
|
-
print(f"Warning: the input type should either be `test` or `image_url`. Skip {item['type']} here.")
|
|
189
|
-
|
|
190
|
-
return aug_content
|
|
191
|
-
|
|
192
|
-
def _get_image_caption(self, img_data: str) -> str:
|
|
193
|
-
"""
|
|
194
|
-
Args:
|
|
195
|
-
img_data (str): base64 encoded image data.
|
|
196
|
-
Returns:
|
|
197
|
-
str: caption for the given image.
|
|
198
|
-
"""
|
|
199
|
-
response = self._lmm_client.create(
|
|
200
|
-
context=None,
|
|
201
|
-
messages=[
|
|
202
|
-
{
|
|
203
|
-
"role": "user",
|
|
204
|
-
"content": [
|
|
205
|
-
{"type": "text", "text": self._description_prompt},
|
|
206
|
-
{
|
|
207
|
-
"type": "image_url",
|
|
208
|
-
"image_url": {
|
|
209
|
-
"url": convert_base64_to_data_uri(img_data),
|
|
210
|
-
},
|
|
211
|
-
},
|
|
212
|
-
],
|
|
213
|
-
}
|
|
214
|
-
],
|
|
215
|
-
)
|
|
216
|
-
description = response.choices[0].message.content
|
|
217
|
-
return content_str(description)
|
|
File without changes
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
def calculate_correlation(csv_path: str, column1: str, column2: str, method: str = "pearson") -> float:
|
|
5
|
-
"""
|
|
6
|
-
Calculate the correlation between two columns in a CSV file.
|
|
7
|
-
|
|
8
|
-
Args:
|
|
9
|
-
csv_path (str): The path to the CSV file.
|
|
10
|
-
column1 (str): The name of the first column.
|
|
11
|
-
column2 (str): The name of the second column.
|
|
12
|
-
method (str or callable, optional): The method used to calculate the correlation.
|
|
13
|
-
- 'pearson' (default): Pearson correlation coefficient.
|
|
14
|
-
- 'kendall': Kendall Tau correlation coefficient.
|
|
15
|
-
- 'spearman': Spearman rank correlation coefficient.
|
|
16
|
-
- callable: A custom correlation function that takes two arrays and returns a scalar.
|
|
17
|
-
|
|
18
|
-
Returns:
|
|
19
|
-
float: The correlation coefficient between the two columns.
|
|
20
|
-
"""
|
|
21
|
-
import pandas as pd
|
|
22
|
-
|
|
23
|
-
# Read the CSV file into a pandas DataFrame
|
|
24
|
-
df = pd.read_csv(csv_path)
|
|
25
|
-
|
|
26
|
-
# Select the specified columns
|
|
27
|
-
selected_columns = df[[column1, column2]]
|
|
28
|
-
|
|
29
|
-
# Calculate the correlation based on the specified method
|
|
30
|
-
if method == "pearson":
|
|
31
|
-
correlation = selected_columns.corr().iloc[0, 1]
|
|
32
|
-
elif method == "kendall":
|
|
33
|
-
correlation = selected_columns.corr(method="kendall").iloc[0, 1]
|
|
34
|
-
elif method == "spearman":
|
|
35
|
-
correlation = selected_columns.corr(method="spearman").iloc[0, 1]
|
|
36
|
-
elif callable(method):
|
|
37
|
-
correlation = selected_columns.corr(method=method).iloc[0, 1]
|
|
38
|
-
else:
|
|
39
|
-
raise ValueError("Invalid correlation method. Please choose 'pearson', 'kendall', 'spearman', or a callable.")
|
|
40
|
-
|
|
41
|
-
return correlation
|
autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_skewness_and_kurtosis.py
DELETED
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
def calculate_skewness_and_kurtosis(csv_file: str, column_name: str) -> tuple:
|
|
5
|
-
"""
|
|
6
|
-
Calculate the skewness and kurtosis of a specified column in a CSV file. The kurtosis is calculated using the Fisher definition.
|
|
7
|
-
The two metrics are computed using scipy.stats functions.
|
|
8
|
-
|
|
9
|
-
Args:
|
|
10
|
-
csv_file (str): The path to the CSV file.
|
|
11
|
-
column_name (str): The name of the column to calculate skewness and kurtosis for.
|
|
12
|
-
|
|
13
|
-
Returns:
|
|
14
|
-
tuple: (skewness, kurtosis)
|
|
15
|
-
"""
|
|
16
|
-
import pandas as pd
|
|
17
|
-
from scipy.stats import kurtosis, skew
|
|
18
|
-
|
|
19
|
-
# Read the CSV file into a pandas DataFrame
|
|
20
|
-
df = pd.read_csv(csv_file)
|
|
21
|
-
|
|
22
|
-
# Extract the specified column
|
|
23
|
-
column = df[column_name]
|
|
24
|
-
|
|
25
|
-
# Calculate the skewness and kurtosis
|
|
26
|
-
skewness = skew(column)
|
|
27
|
-
kurt = kurtosis(column)
|
|
28
|
-
|
|
29
|
-
return skewness, kurt
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
def detect_outlier_iqr(csv_file: str, column_name: str):
|
|
5
|
-
"""
|
|
6
|
-
Detect outliers in a specified column of a CSV file using the IQR method.
|
|
7
|
-
|
|
8
|
-
Args:
|
|
9
|
-
csv_file (str): The path to the CSV file.
|
|
10
|
-
column_name (str): The name of the column to detect outliers in.
|
|
11
|
-
|
|
12
|
-
Returns:
|
|
13
|
-
list: A list of row indices that correspond to the outliers.
|
|
14
|
-
"""
|
|
15
|
-
import pandas as pd
|
|
16
|
-
|
|
17
|
-
# Read the CSV file into a pandas DataFrame
|
|
18
|
-
df = pd.read_csv(csv_file)
|
|
19
|
-
|
|
20
|
-
# Calculate the quartiles and IQR for the specified column
|
|
21
|
-
q1 = df[column_name].quantile(0.25)
|
|
22
|
-
q3 = df[column_name].quantile(0.75)
|
|
23
|
-
iqr = q3 - q1
|
|
24
|
-
|
|
25
|
-
# Find the outliers based on the defined criteria
|
|
26
|
-
outliers = df[(df[column_name] < q1 - 1.5 * iqr) | (df[column_name] > q3 + 1.5 * iqr)]
|
|
27
|
-
|
|
28
|
-
# Return the row indices of the outliers
|
|
29
|
-
return outliers.index.tolist()
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
def detect_outlier_zscore(csv_file, column_name, threshold=3):
|
|
5
|
-
"""
|
|
6
|
-
Detect outliers in a CSV file based on a specified column. The outliers are determined by calculating the z-score of the data points in the column.
|
|
7
|
-
|
|
8
|
-
Args:
|
|
9
|
-
csv_file (str): The path to the CSV file.
|
|
10
|
-
column_name (str): The name of the column to calculate z-scores for.
|
|
11
|
-
threshold (float, optional): The threshold value for determining outliers. By default set to 3.
|
|
12
|
-
|
|
13
|
-
Returns:
|
|
14
|
-
list: A list of row indices where the z-score is above the threshold.
|
|
15
|
-
"""
|
|
16
|
-
import numpy as np
|
|
17
|
-
import pandas as pd
|
|
18
|
-
|
|
19
|
-
# Read the CSV file into a pandas DataFrame
|
|
20
|
-
df = pd.read_csv(csv_file)
|
|
21
|
-
|
|
22
|
-
# Calculate the z-score for the specified column
|
|
23
|
-
z_scores = np.abs((df[column_name] - df[column_name].mean()) / df[column_name].std())
|
|
24
|
-
|
|
25
|
-
# Find the row indices where the z-score is above the threshold
|
|
26
|
-
outlier_indices = np.where(z_scores > threshold)[0]
|
|
27
|
-
|
|
28
|
-
# Return the row indices of the outliers
|
|
29
|
-
return outlier_indices
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
def explore_csv(file_path, num_lines=5):
|
|
5
|
-
"""
|
|
6
|
-
Reads a CSV file and prints the column names, shape, data types, and the first few lines of data.
|
|
7
|
-
|
|
8
|
-
Args:
|
|
9
|
-
file_path (str): The path to the CSV file.
|
|
10
|
-
num_lines (int, optional): The number of lines to print. Defaults to 5.
|
|
11
|
-
"""
|
|
12
|
-
import pandas as pd
|
|
13
|
-
|
|
14
|
-
df = pd.read_csv(file_path)
|
|
15
|
-
header = df.columns
|
|
16
|
-
print("Columns:")
|
|
17
|
-
print(", ".join(header))
|
|
18
|
-
print("Shape:", df.shape)
|
|
19
|
-
print("Data Types:")
|
|
20
|
-
print(df.dtypes)
|
|
21
|
-
print("First", num_lines, "lines:")
|
|
22
|
-
print(df.head(num_lines))
|
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
from autogen.coding.func_with_reqs import with_requirements
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
@with_requirements(["pandas", "scipy"])
|
|
8
|
-
def shapiro_wilk_test(csv_file, column_name):
|
|
9
|
-
"""
|
|
10
|
-
Perform the Shapiro-Wilk test on a specified column of a CSV file.
|
|
11
|
-
|
|
12
|
-
Args:
|
|
13
|
-
csv_file (str): The path to the CSV file.
|
|
14
|
-
column_name (str): The name of the column to perform the test on.
|
|
15
|
-
|
|
16
|
-
Returns:
|
|
17
|
-
float: The p-value resulting from the Shapiro-Wilk test.
|
|
18
|
-
"""
|
|
19
|
-
import pandas as pd
|
|
20
|
-
from scipy.stats import shapiro
|
|
21
|
-
|
|
22
|
-
# Read the CSV file into a pandas DataFrame
|
|
23
|
-
df = pd.read_csv(csv_file)
|
|
24
|
-
|
|
25
|
-
# Extract the specified column as a numpy array
|
|
26
|
-
column_data = df[column_name].values
|
|
27
|
-
|
|
28
|
-
# Perform the Shapiro-Wilk test
|
|
29
|
-
_, p_value = shapiro(column_data)
|
|
30
|
-
|
|
31
|
-
return p_value
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
import arxiv
|
|
5
|
-
|
|
6
|
-
from autogen.coding.func_with_reqs import with_requirements
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
@with_requirements(["arxiv"], ["arxiv"])
|
|
10
|
-
def arxiv_download(id_list: list, download_dir="./"):
|
|
11
|
-
"""
|
|
12
|
-
Downloads PDF files from ArXiv based on a list of arxiv paper IDs.
|
|
13
|
-
|
|
14
|
-
Args:
|
|
15
|
-
id_list (list): A list of paper IDs to download. e.g. [2302.00006v1]
|
|
16
|
-
download_dir (str, optional): The directory to save the downloaded PDF files. Defaults to './'.
|
|
17
|
-
|
|
18
|
-
Returns:
|
|
19
|
-
list: A list of paths to the downloaded PDF files.
|
|
20
|
-
"""
|
|
21
|
-
paths = []
|
|
22
|
-
for paper in arxiv.Client().results(arxiv.Search(id_list=id_list)):
|
|
23
|
-
path = paper.download_pdf(download_dir, filename=paper.get_short_id() + ".pdf")
|
|
24
|
-
paths.append(path)
|
|
25
|
-
print("Paper id:", paper.get_short_id(), "Downloaded to:", path)
|
|
26
|
-
return paths
|
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
import arxiv
|
|
5
|
-
|
|
6
|
-
from autogen.coding.func_with_reqs import with_requirements
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
@with_requirements(["arxiv"], ["arxiv"])
|
|
10
|
-
def arxiv_search(query, max_results=10, sortby="relevance"):
|
|
11
|
-
"""
|
|
12
|
-
Search for articles on arXiv based on the given query.
|
|
13
|
-
|
|
14
|
-
Args:
|
|
15
|
-
query (str): The search query.
|
|
16
|
-
max_results (int, optional): The maximum number of results to retrieve. Defaults to 10.
|
|
17
|
-
sortby (str, optional): The sorting criterion for the search results. Can be 'relevance' or 'submittedDate'. Defaults to 'relevance'.
|
|
18
|
-
|
|
19
|
-
Returns:
|
|
20
|
-
list: A list of dictionaries containing information about the search results. Each dictionary contains the following keys:
|
|
21
|
-
- 'title': The title of the article.
|
|
22
|
-
- 'authors': The authors of the article.
|
|
23
|
-
- 'summary': The summary of the article.
|
|
24
|
-
- 'entry_id': The entry ID of the article.
|
|
25
|
-
- 'doi': The DOI of the article (If applicable).
|
|
26
|
-
- 'published': The publication date of the article in the format 'Y-M'.
|
|
27
|
-
"""
|
|
28
|
-
|
|
29
|
-
def get_author(r):
|
|
30
|
-
return ", ".join(a.name for a in r.authors)
|
|
31
|
-
|
|
32
|
-
criterion = {"relevance": arxiv.SortCriterion.Relevance, "submittedDate": arxiv.SortCriterion.SubmittedDate}[sortby]
|
|
33
|
-
|
|
34
|
-
client = arxiv.Client()
|
|
35
|
-
search = arxiv.Search(query=query, max_results=max_results, sort_by=criterion)
|
|
36
|
-
res = []
|
|
37
|
-
results = client.results(search)
|
|
38
|
-
for r in results:
|
|
39
|
-
print("Entry id:", r.entry_id)
|
|
40
|
-
print("Title:", r.title)
|
|
41
|
-
print("Authors:", get_author(r))
|
|
42
|
-
print("DOI:", r.doi)
|
|
43
|
-
print("Published:", r.published.strftime("%Y-%m"))
|
|
44
|
-
# print("Summary:", r.summary)
|
|
45
|
-
res.append(
|
|
46
|
-
{
|
|
47
|
-
"title": r.title,
|
|
48
|
-
"authors": get_author(r),
|
|
49
|
-
"summary": r.summary,
|
|
50
|
-
"entry_id": r.entry_id,
|
|
51
|
-
"doi": r.doi,
|
|
52
|
-
"published": r.published.strftime("%Y-%m"),
|
|
53
|
-
}
|
|
54
|
-
)
|
|
55
|
-
return res
|
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
import os
|
|
5
|
-
|
|
6
|
-
from autogen.coding.func_with_reqs import with_requirements
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
@with_requirements(["PyMuPDF"], ["os"])
|
|
10
|
-
def extract_pdf_image(pdf_path: str, output_dir: str, page_number=None):
|
|
11
|
-
"""
|
|
12
|
-
Extracts images from a PDF file and saves them to the specified output directory.
|
|
13
|
-
|
|
14
|
-
Args:
|
|
15
|
-
pdf_path (str): The path to the PDF file.
|
|
16
|
-
output_dir (str): The directory to save the extracted images.
|
|
17
|
-
page_number (int, optional): The page number to extract images from. If not provided, extract images from all pages.
|
|
18
|
-
"""
|
|
19
|
-
import fitz # PyMuPDF library
|
|
20
|
-
|
|
21
|
-
# Open the PDF file
|
|
22
|
-
doc = fitz.open(pdf_path)
|
|
23
|
-
|
|
24
|
-
# Create the output directory if it doesn't exist
|
|
25
|
-
os.makedirs(output_dir, exist_ok=True)
|
|
26
|
-
|
|
27
|
-
# Extract images from the PDF file
|
|
28
|
-
images = []
|
|
29
|
-
if page_number is not None:
|
|
30
|
-
page = doc[page_number - 1] # Adjust page number to 0-based index
|
|
31
|
-
for img in page.get_images():
|
|
32
|
-
xref = img[0]
|
|
33
|
-
base_image = doc.extract_image(xref)
|
|
34
|
-
image_bytes = base_image["image"]
|
|
35
|
-
images.append(image_bytes)
|
|
36
|
-
else:
|
|
37
|
-
for page in doc:
|
|
38
|
-
for img in page.get_images():
|
|
39
|
-
xref = img[0]
|
|
40
|
-
base_image = doc.extract_image(xref)
|
|
41
|
-
image_bytes = base_image["image"]
|
|
42
|
-
images.append(image_bytes)
|
|
43
|
-
|
|
44
|
-
# Save the extracted images
|
|
45
|
-
for i, image_bytes in enumerate(images):
|
|
46
|
-
image_path = os.path.join(output_dir, f"image_{i}.png")
|
|
47
|
-
with open(image_path, "wb") as f:
|
|
48
|
-
f.write(image_bytes)
|
|
49
|
-
|
|
50
|
-
# Print the total number of images saved
|
|
51
|
-
print(f"Saved a total of {len(images)} images")
|
|
52
|
-
|
|
53
|
-
# Close the PDF file
|
|
54
|
-
doc.close()
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
from autogen.coding.func_with_reqs import with_requirements
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
@with_requirements(["PyMuPDF"])
|
|
8
|
-
def extract_pdf_text(pdf_path, page_number=None):
|
|
9
|
-
"""
|
|
10
|
-
Extracts text from a specified page or the entire PDF file.
|
|
11
|
-
|
|
12
|
-
Args:
|
|
13
|
-
pdf_path (str): The path to the PDF file.
|
|
14
|
-
page_number (int, optional): The page number to extract (starting from 0). If not provided,
|
|
15
|
-
the function will extract text from the entire PDF file.
|
|
16
|
-
|
|
17
|
-
Returns:
|
|
18
|
-
str: The extracted text.
|
|
19
|
-
"""
|
|
20
|
-
import fitz
|
|
21
|
-
|
|
22
|
-
# Open the PDF file
|
|
23
|
-
doc = fitz.open(pdf_path)
|
|
24
|
-
|
|
25
|
-
# Extract text from the entire PDF file or a specific page
|
|
26
|
-
text = ""
|
|
27
|
-
if page_number is None:
|
|
28
|
-
# Extract content from the entire PDF file
|
|
29
|
-
for page in doc:
|
|
30
|
-
text += page.get_text()
|
|
31
|
-
else:
|
|
32
|
-
# Extract content from a specific page
|
|
33
|
-
page = doc[page_number]
|
|
34
|
-
text = page.get_text()
|
|
35
|
-
|
|
36
|
-
# Close the PDF file
|
|
37
|
-
doc.close()
|
|
38
|
-
|
|
39
|
-
return text
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
def get_wikipedia_text(title):
|
|
5
|
-
"""
|
|
6
|
-
Retrieves the text content of a Wikipedia page. It does not support tables and other complex formatting.
|
|
7
|
-
|
|
8
|
-
Args:
|
|
9
|
-
title (str): The title of the Wikipedia page.
|
|
10
|
-
|
|
11
|
-
Returns:
|
|
12
|
-
str or None: The text content of the Wikipedia page if it exists, None otherwise.
|
|
13
|
-
"""
|
|
14
|
-
import wikipediaapi
|
|
15
|
-
|
|
16
|
-
wiki_wiki = wikipediaapi.Wikipedia("Mozilla/5.0 (merlin@example.com)", "en")
|
|
17
|
-
page = wiki_wiki.page(title)
|
|
18
|
-
|
|
19
|
-
if page.exists():
|
|
20
|
-
return page.text
|
|
21
|
-
else:
|
|
22
|
-
return None
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
-
#
|
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
# alternative api: https://rapidapi.com/omarmhaimdat/api/youtube-v2
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def get_youtube_caption(videoId):
|
|
8
|
-
"""
|
|
9
|
-
Retrieves the captions for a YouTube video.
|
|
10
|
-
|
|
11
|
-
Args:
|
|
12
|
-
videoId (str): The ID of the YouTube video.
|
|
13
|
-
|
|
14
|
-
Returns:
|
|
15
|
-
str: The captions of the YouTube video in text format.
|
|
16
|
-
|
|
17
|
-
Raises:
|
|
18
|
-
KeyError: If the RAPID_API_KEY environment variable is not set.
|
|
19
|
-
"""
|
|
20
|
-
import os
|
|
21
|
-
|
|
22
|
-
import requests
|
|
23
|
-
|
|
24
|
-
RAPID_API_KEY = os.environ["RAPID_API_KEY"]
|
|
25
|
-
video_url = f"https://www.youtube.com/watch?v={videoId}"
|
|
26
|
-
url = "https://youtube-transcript3.p.rapidapi.com/api/transcript-with-url"
|
|
27
|
-
|
|
28
|
-
querystring = {"url": video_url, "lang": "en", "flat_text": "true"}
|
|
29
|
-
|
|
30
|
-
headers = {"X-RapidAPI-Key": RAPID_API_KEY, "X-RapidAPI-Host": "youtube-transcript3.p.rapidapi.com"}
|
|
31
|
-
|
|
32
|
-
response = requests.get(url, headers=headers, params=querystring)
|
|
33
|
-
response = response.json()
|
|
34
|
-
print(response)
|
|
35
|
-
return response["transcript"]
|