ag2 0.4b1__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ag2 might be problematic. Click here for more details.
- {ag2-0.4b1.dist-info → ag2-0.4.1.dist-info}/METADATA +13 -9
- ag2-0.4.1.dist-info/RECORD +158 -0
- autogen/agentchat/contrib/agent_builder.py +1 -0
- autogen/agentchat/contrib/agent_eval/agent_eval.py +107 -0
- autogen/agentchat/contrib/agent_eval/criterion.py +47 -0
- autogen/agentchat/contrib/agent_eval/critic_agent.py +47 -0
- autogen/agentchat/contrib/agent_eval/quantifier_agent.py +42 -0
- autogen/agentchat/contrib/agent_eval/subcritic_agent.py +48 -0
- autogen/agentchat/contrib/agent_eval/task.py +43 -0
- autogen/agentchat/contrib/captainagent/tools/__init__.py +0 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_correlation.py +41 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/calculate_skewness_and_kurtosis.py +29 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_iqr.py +29 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/detect_outlier_zscore.py +29 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/explore_csv.py +22 -0
- autogen/agentchat/contrib/captainagent/tools/data_analysis/shapiro_wilk_test.py +31 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_download.py +26 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/arxiv_search.py +55 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_image.py +54 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/extract_pdf_text.py +39 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_wikipedia_text.py +22 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/get_youtube_caption.py +35 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/image_qa.py +61 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/optical_character_recognition.py +62 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/perform_web_search.py +48 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/scrape_wikipedia_tables.py +34 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/transcribe_audio_file.py +22 -0
- autogen/agentchat/contrib/captainagent/tools/information_retrieval/youtube_download.py +36 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_circle_area_from_diameter.py +22 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_day_of_the_week.py +19 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_fraction_sum.py +29 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_matrix_power.py +32 -0
- autogen/agentchat/contrib/captainagent/tools/math/calculate_reflected_point.py +17 -0
- autogen/agentchat/contrib/captainagent/tools/math/complex_numbers_product.py +26 -0
- autogen/agentchat/contrib/captainagent/tools/math/compute_currency_conversion.py +24 -0
- autogen/agentchat/contrib/captainagent/tools/math/count_distinct_permutations.py +28 -0
- autogen/agentchat/contrib/captainagent/tools/math/evaluate_expression.py +29 -0
- autogen/agentchat/contrib/captainagent/tools/math/find_continuity_point.py +35 -0
- autogen/agentchat/contrib/captainagent/tools/math/fraction_to_mixed_numbers.py +40 -0
- autogen/agentchat/contrib/captainagent/tools/math/modular_inverse_sum.py +23 -0
- autogen/agentchat/contrib/captainagent/tools/math/simplify_mixed_numbers.py +37 -0
- autogen/agentchat/contrib/captainagent/tools/math/sum_of_digit_factorials.py +16 -0
- autogen/agentchat/contrib/captainagent/tools/math/sum_of_primes_below.py +16 -0
- autogen/agentchat/contrib/captainagent/tools/requirements.txt +10 -0
- autogen/agentchat/contrib/captainagent/tools/tool_description.tsv +34 -0
- autogen/agentchat/contrib/captainagent.py +3 -0
- autogen/agentchat/contrib/graph_rag/document.py +6 -0
- autogen/agentchat/contrib/graph_rag/falkor_graph_query_engine.py +55 -20
- autogen/agentchat/contrib/graph_rag/falkor_graph_rag_capability.py +81 -0
- autogen/agentchat/contrib/graph_rag/graph_query_engine.py +7 -1
- autogen/agentchat/contrib/graph_rag/graph_rag_capability.py +12 -4
- autogen/agentchat/contrib/retrieve_user_proxy_agent.py +7 -3
- autogen/agentchat/contrib/swarm_agent.py +69 -20
- autogen/agentchat/contrib/tool_retriever.py +6 -0
- autogen/coding/base.py +2 -2
- autogen/coding/docker_commandline_code_executor.py +2 -2
- autogen/coding/factory.py +2 -2
- autogen/coding/func_with_reqs.py +2 -2
- autogen/coding/jupyter/base.py +2 -2
- autogen/coding/jupyter/docker_jupyter_server.py +2 -2
- autogen/coding/jupyter/embedded_ipython_code_executor.py +2 -2
- autogen/coding/jupyter/jupyter_client.py +2 -2
- autogen/coding/jupyter/jupyter_code_executor.py +2 -2
- autogen/coding/jupyter/local_jupyter_server.py +2 -2
- autogen/coding/local_commandline_code_executor.py +2 -2
- autogen/coding/markdown_code_extractor.py +2 -2
- autogen/coding/utils.py +2 -2
- autogen/oai/bedrock.py +6 -0
- autogen/oai/cerebras.py +6 -0
- autogen/oai/ollama.py +6 -0
- autogen/oai/openai_utils.py +1 -0
- autogen/token_count_utils.py +2 -0
- autogen/version.py +1 -1
- ag2-0.4b1.dist-info/RECORD +0 -115
- {ag2-0.4b1.dist-info → ag2-0.4.1.dist-info}/LICENSE +0 -0
- {ag2-0.4b1.dist-info → ag2-0.4.1.dist-info}/NOTICE.md +0 -0
- {ag2-0.4b1.dist-info → ag2-0.4.1.dist-info}/WHEEL +0 -0
- {ag2-0.4b1.dist-info → ag2-0.4.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
def calculate_skewness_and_kurtosis(csv_file: str, column_name: str) -> tuple:
|
|
5
|
+
"""
|
|
6
|
+
Calculate the skewness and kurtosis of a specified column in a CSV file. The kurtosis is calculated using the Fisher definition.
|
|
7
|
+
The two metrics are computed using scipy.stats functions.
|
|
8
|
+
|
|
9
|
+
Args:
|
|
10
|
+
csv_file (str): The path to the CSV file.
|
|
11
|
+
column_name (str): The name of the column to calculate skewness and kurtosis for.
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
tuple: (skewness, kurtosis)
|
|
15
|
+
"""
|
|
16
|
+
import pandas as pd
|
|
17
|
+
from scipy.stats import kurtosis, skew
|
|
18
|
+
|
|
19
|
+
# Read the CSV file into a pandas DataFrame
|
|
20
|
+
df = pd.read_csv(csv_file)
|
|
21
|
+
|
|
22
|
+
# Extract the specified column
|
|
23
|
+
column = df[column_name]
|
|
24
|
+
|
|
25
|
+
# Calculate the skewness and kurtosis
|
|
26
|
+
skewness = skew(column)
|
|
27
|
+
kurt = kurtosis(column)
|
|
28
|
+
|
|
29
|
+
return skewness, kurt
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
def detect_outlier_iqr(csv_file: str, column_name: str):
|
|
5
|
+
"""
|
|
6
|
+
Detect outliers in a specified column of a CSV file using the IQR method.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
csv_file (str): The path to the CSV file.
|
|
10
|
+
column_name (str): The name of the column to detect outliers in.
|
|
11
|
+
|
|
12
|
+
Returns:
|
|
13
|
+
list: A list of row indices that correspond to the outliers.
|
|
14
|
+
"""
|
|
15
|
+
import pandas as pd
|
|
16
|
+
|
|
17
|
+
# Read the CSV file into a pandas DataFrame
|
|
18
|
+
df = pd.read_csv(csv_file)
|
|
19
|
+
|
|
20
|
+
# Calculate the quartiles and IQR for the specified column
|
|
21
|
+
q1 = df[column_name].quantile(0.25)
|
|
22
|
+
q3 = df[column_name].quantile(0.75)
|
|
23
|
+
iqr = q3 - q1
|
|
24
|
+
|
|
25
|
+
# Find the outliers based on the defined criteria
|
|
26
|
+
outliers = df[(df[column_name] < q1 - 1.5 * iqr) | (df[column_name] > q3 + 1.5 * iqr)]
|
|
27
|
+
|
|
28
|
+
# Return the row indices of the outliers
|
|
29
|
+
return outliers.index.tolist()
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
def detect_outlier_zscore(csv_file, column_name, threshold=3):
|
|
5
|
+
"""
|
|
6
|
+
Detect outliers in a CSV file based on a specified column. The outliers are determined by calculating the z-score of the data points in the column.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
csv_file (str): The path to the CSV file.
|
|
10
|
+
column_name (str): The name of the column to calculate z-scores for.
|
|
11
|
+
threshold (float, optional): The threshold value for determining outliers. By default set to 3.
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
list: A list of row indices where the z-score is above the threshold.
|
|
15
|
+
"""
|
|
16
|
+
import numpy as np
|
|
17
|
+
import pandas as pd
|
|
18
|
+
|
|
19
|
+
# Read the CSV file into a pandas DataFrame
|
|
20
|
+
df = pd.read_csv(csv_file)
|
|
21
|
+
|
|
22
|
+
# Calculate the z-score for the specified column
|
|
23
|
+
z_scores = np.abs((df[column_name] - df[column_name].mean()) / df[column_name].std())
|
|
24
|
+
|
|
25
|
+
# Find the row indices where the z-score is above the threshold
|
|
26
|
+
outlier_indices = np.where(z_scores > threshold)[0]
|
|
27
|
+
|
|
28
|
+
# Return the row indices of the outliers
|
|
29
|
+
return outlier_indices
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
def explore_csv(file_path, num_lines=5):
|
|
5
|
+
"""
|
|
6
|
+
Reads a CSV file and prints the column names, shape, data types, and the first few lines of data.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
file_path (str): The path to the CSV file.
|
|
10
|
+
num_lines (int, optional): The number of lines to print. Defaults to 5.
|
|
11
|
+
"""
|
|
12
|
+
import pandas as pd
|
|
13
|
+
|
|
14
|
+
df = pd.read_csv(file_path)
|
|
15
|
+
header = df.columns
|
|
16
|
+
print("Columns:")
|
|
17
|
+
print(", ".join(header))
|
|
18
|
+
print("Shape:", df.shape)
|
|
19
|
+
print("Data Types:")
|
|
20
|
+
print(df.dtypes)
|
|
21
|
+
print("First", num_lines, "lines:")
|
|
22
|
+
print(df.head(num_lines))
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
from autogen.coding.func_with_reqs import with_requirements
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@with_requirements(["pandas", "scipy"])
|
|
8
|
+
def shapiro_wilk_test(csv_file, column_name):
|
|
9
|
+
"""
|
|
10
|
+
Perform the Shapiro-Wilk test on a specified column of a CSV file.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
csv_file (str): The path to the CSV file.
|
|
14
|
+
column_name (str): The name of the column to perform the test on.
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
float: The p-value resulting from the Shapiro-Wilk test.
|
|
18
|
+
"""
|
|
19
|
+
import pandas as pd
|
|
20
|
+
from scipy.stats import shapiro
|
|
21
|
+
|
|
22
|
+
# Read the CSV file into a pandas DataFrame
|
|
23
|
+
df = pd.read_csv(csv_file)
|
|
24
|
+
|
|
25
|
+
# Extract the specified column as a numpy array
|
|
26
|
+
column_data = df[column_name].values
|
|
27
|
+
|
|
28
|
+
# Perform the Shapiro-Wilk test
|
|
29
|
+
_, p_value = shapiro(column_data)
|
|
30
|
+
|
|
31
|
+
return p_value
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import arxiv
|
|
5
|
+
|
|
6
|
+
from autogen.coding.func_with_reqs import with_requirements
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@with_requirements(["arxiv"], ["arxiv"])
|
|
10
|
+
def arxiv_download(id_list: list, download_dir="./"):
|
|
11
|
+
"""
|
|
12
|
+
Downloads PDF files from ArXiv based on a list of arxiv paper IDs.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
id_list (list): A list of paper IDs to download. e.g. [2302.00006v1]
|
|
16
|
+
download_dir (str, optional): The directory to save the downloaded PDF files. Defaults to './'.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
list: A list of paths to the downloaded PDF files.
|
|
20
|
+
"""
|
|
21
|
+
paths = []
|
|
22
|
+
for paper in arxiv.Client().results(arxiv.Search(id_list=id_list)):
|
|
23
|
+
path = paper.download_pdf(download_dir, filename=paper.get_short_id() + ".pdf")
|
|
24
|
+
paths.append(path)
|
|
25
|
+
print("Paper id:", paper.get_short_id(), "Downloaded to:", path)
|
|
26
|
+
return paths
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import arxiv
|
|
5
|
+
|
|
6
|
+
from autogen.coding.func_with_reqs import with_requirements
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@with_requirements(["arxiv"], ["arxiv"])
|
|
10
|
+
def arxiv_search(query, max_results=10, sortby="relevance"):
|
|
11
|
+
"""
|
|
12
|
+
Search for articles on arXiv based on the given query.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
query (str): The search query.
|
|
16
|
+
max_results (int, optional): The maximum number of results to retrieve. Defaults to 10.
|
|
17
|
+
sortby (str, optional): The sorting criterion for the search results. Can be 'relevance' or 'submittedDate'. Defaults to 'relevance'.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
list: A list of dictionaries containing information about the search results. Each dictionary contains the following keys:
|
|
21
|
+
- 'title': The title of the article.
|
|
22
|
+
- 'authors': The authors of the article.
|
|
23
|
+
- 'summary': The summary of the article.
|
|
24
|
+
- 'entry_id': The entry ID of the article.
|
|
25
|
+
- 'doi': The DOI of the article (If applicable).
|
|
26
|
+
- 'published': The publication date of the article in the format 'Y-M'.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def get_author(r):
|
|
30
|
+
return ", ".join(a.name for a in r.authors)
|
|
31
|
+
|
|
32
|
+
criterion = {"relevance": arxiv.SortCriterion.Relevance, "submittedDate": arxiv.SortCriterion.SubmittedDate}[sortby]
|
|
33
|
+
|
|
34
|
+
client = arxiv.Client()
|
|
35
|
+
search = arxiv.Search(query=query, max_results=max_results, sort_by=criterion)
|
|
36
|
+
res = []
|
|
37
|
+
results = client.results(search)
|
|
38
|
+
for r in results:
|
|
39
|
+
print("Entry id:", r.entry_id)
|
|
40
|
+
print("Title:", r.title)
|
|
41
|
+
print("Authors:", get_author(r))
|
|
42
|
+
print("DOI:", r.doi)
|
|
43
|
+
print("Published:", r.published.strftime("%Y-%m"))
|
|
44
|
+
# print("Summary:", r.summary)
|
|
45
|
+
res.append(
|
|
46
|
+
{
|
|
47
|
+
"title": r.title,
|
|
48
|
+
"authors": get_author(r),
|
|
49
|
+
"summary": r.summary,
|
|
50
|
+
"entry_id": r.entry_id,
|
|
51
|
+
"doi": r.doi,
|
|
52
|
+
"published": r.published.strftime("%Y-%m"),
|
|
53
|
+
}
|
|
54
|
+
)
|
|
55
|
+
return res
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
from autogen.coding.func_with_reqs import with_requirements
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@with_requirements(["PyMuPDF"], ["os"])
|
|
10
|
+
def extract_pdf_image(pdf_path: str, output_dir: str, page_number=None):
|
|
11
|
+
"""
|
|
12
|
+
Extracts images from a PDF file and saves them to the specified output directory.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
pdf_path (str): The path to the PDF file.
|
|
16
|
+
output_dir (str): The directory to save the extracted images.
|
|
17
|
+
page_number (int, optional): The page number to extract images from. If not provided, extract images from all pages.
|
|
18
|
+
"""
|
|
19
|
+
import fitz # PyMuPDF library
|
|
20
|
+
|
|
21
|
+
# Open the PDF file
|
|
22
|
+
doc = fitz.open(pdf_path)
|
|
23
|
+
|
|
24
|
+
# Create the output directory if it doesn't exist
|
|
25
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
26
|
+
|
|
27
|
+
# Extract images from the PDF file
|
|
28
|
+
images = []
|
|
29
|
+
if page_number is not None:
|
|
30
|
+
page = doc[page_number - 1] # Adjust page number to 0-based index
|
|
31
|
+
for img in page.get_images():
|
|
32
|
+
xref = img[0]
|
|
33
|
+
base_image = doc.extract_image(xref)
|
|
34
|
+
image_bytes = base_image["image"]
|
|
35
|
+
images.append(image_bytes)
|
|
36
|
+
else:
|
|
37
|
+
for page in doc:
|
|
38
|
+
for img in page.get_images():
|
|
39
|
+
xref = img[0]
|
|
40
|
+
base_image = doc.extract_image(xref)
|
|
41
|
+
image_bytes = base_image["image"]
|
|
42
|
+
images.append(image_bytes)
|
|
43
|
+
|
|
44
|
+
# Save the extracted images
|
|
45
|
+
for i, image_bytes in enumerate(images):
|
|
46
|
+
image_path = os.path.join(output_dir, f"image_{i}.png")
|
|
47
|
+
with open(image_path, "wb") as f:
|
|
48
|
+
f.write(image_bytes)
|
|
49
|
+
|
|
50
|
+
# Print the total number of images saved
|
|
51
|
+
print(f"Saved a total of {len(images)} images")
|
|
52
|
+
|
|
53
|
+
# Close the PDF file
|
|
54
|
+
doc.close()
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
from autogen.coding.func_with_reqs import with_requirements
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@with_requirements(["PyMuPDF"])
|
|
8
|
+
def extract_pdf_text(pdf_path, page_number=None):
|
|
9
|
+
"""
|
|
10
|
+
Extracts text from a specified page or the entire PDF file.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
pdf_path (str): The path to the PDF file.
|
|
14
|
+
page_number (int, optional): The page number to extract (starting from 0). If not provided,
|
|
15
|
+
the function will extract text from the entire PDF file.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
str: The extracted text.
|
|
19
|
+
"""
|
|
20
|
+
import fitz
|
|
21
|
+
|
|
22
|
+
# Open the PDF file
|
|
23
|
+
doc = fitz.open(pdf_path)
|
|
24
|
+
|
|
25
|
+
# Extract text from the entire PDF file or a specific page
|
|
26
|
+
text = ""
|
|
27
|
+
if page_number is None:
|
|
28
|
+
# Extract content from the entire PDF file
|
|
29
|
+
for page in doc:
|
|
30
|
+
text += page.get_text()
|
|
31
|
+
else:
|
|
32
|
+
# Extract content from a specific page
|
|
33
|
+
page = doc[page_number]
|
|
34
|
+
text = page.get_text()
|
|
35
|
+
|
|
36
|
+
# Close the PDF file
|
|
37
|
+
doc.close()
|
|
38
|
+
|
|
39
|
+
return text
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
def get_wikipedia_text(title):
|
|
5
|
+
"""
|
|
6
|
+
Retrieves the text content of a Wikipedia page. It does not support tables and other complex formatting.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
title (str): The title of the Wikipedia page.
|
|
10
|
+
|
|
11
|
+
Returns:
|
|
12
|
+
str or None: The text content of the Wikipedia page if it exists, None otherwise.
|
|
13
|
+
"""
|
|
14
|
+
import wikipediaapi
|
|
15
|
+
|
|
16
|
+
wiki_wiki = wikipediaapi.Wikipedia("Mozilla/5.0 (merlin@example.com)", "en")
|
|
17
|
+
page = wiki_wiki.page(title)
|
|
18
|
+
|
|
19
|
+
if page.exists():
|
|
20
|
+
return page.text
|
|
21
|
+
else:
|
|
22
|
+
return None
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
# alternative api: https://rapidapi.com/omarmhaimdat/api/youtube-v2
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_youtube_caption(videoId):
|
|
8
|
+
"""
|
|
9
|
+
Retrieves the captions for a YouTube video.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
videoId (str): The ID of the YouTube video.
|
|
13
|
+
|
|
14
|
+
Returns:
|
|
15
|
+
str: The captions of the YouTube video in text format.
|
|
16
|
+
|
|
17
|
+
Raises:
|
|
18
|
+
KeyError: If the RAPID_API_KEY environment variable is not set.
|
|
19
|
+
"""
|
|
20
|
+
import os
|
|
21
|
+
|
|
22
|
+
import requests
|
|
23
|
+
|
|
24
|
+
RAPID_API_KEY = os.environ["RAPID_API_KEY"]
|
|
25
|
+
video_url = f"https://www.youtube.com/watch?v={videoId}"
|
|
26
|
+
url = "https://youtube-transcript3.p.rapidapi.com/api/transcript-with-url"
|
|
27
|
+
|
|
28
|
+
querystring = {"url": video_url, "lang": "en", "flat_text": "true"}
|
|
29
|
+
|
|
30
|
+
headers = {"X-RapidAPI-Key": RAPID_API_KEY, "X-RapidAPI-Host": "youtube-transcript3.p.rapidapi.com"}
|
|
31
|
+
|
|
32
|
+
response = requests.get(url, headers=headers, params=querystring)
|
|
33
|
+
response = response.json()
|
|
34
|
+
print(response)
|
|
35
|
+
return response["transcript"]
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
from PIL import Image
|
|
7
|
+
|
|
8
|
+
from autogen.coding.func_with_reqs import with_requirements
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@with_requirements(["transformers", "torch"], ["transformers", "torch", "PIL", "os"])
|
|
12
|
+
def image_qa(image, question, ckpt="Salesforce/blip-vqa-base"):
|
|
13
|
+
"""
|
|
14
|
+
Perform question answering on an image using a pre-trained VQA model.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
image (Union[str, Image.Image]): The image to perform question answering on. It can be either file path to the image or a PIL Image object.
|
|
18
|
+
question: The question to ask about the image.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
dict: The generated answer text.
|
|
22
|
+
"""
|
|
23
|
+
import torch
|
|
24
|
+
from transformers import BlipForQuestionAnswering, BlipProcessor
|
|
25
|
+
|
|
26
|
+
def image_processing(img):
|
|
27
|
+
if isinstance(img, Image.Image):
|
|
28
|
+
return img.convert("RGB")
|
|
29
|
+
elif isinstance(img, str):
|
|
30
|
+
if os.path.exists(img):
|
|
31
|
+
return Image.open(img).convert("RGB")
|
|
32
|
+
else:
|
|
33
|
+
full_path = img
|
|
34
|
+
if os.path.exists(full_path):
|
|
35
|
+
return Image.open(full_path).convert("RGB")
|
|
36
|
+
else:
|
|
37
|
+
raise FileNotFoundError
|
|
38
|
+
|
|
39
|
+
def text_processing(file_path):
|
|
40
|
+
# Check the file extension
|
|
41
|
+
if file_path.endswith(".txt"):
|
|
42
|
+
with open(file_path, "r") as file:
|
|
43
|
+
content = file.read()
|
|
44
|
+
else:
|
|
45
|
+
# if the file is not .txt, then it is a string, directly return the string
|
|
46
|
+
return file_path
|
|
47
|
+
return content
|
|
48
|
+
|
|
49
|
+
image = image_processing(image)
|
|
50
|
+
question = text_processing(question)
|
|
51
|
+
|
|
52
|
+
processor = BlipProcessor.from_pretrained(ckpt)
|
|
53
|
+
model = BlipForQuestionAnswering.from_pretrained(ckpt, torch_dtype=torch.float16).to("cuda")
|
|
54
|
+
|
|
55
|
+
raw_image = image
|
|
56
|
+
|
|
57
|
+
inputs = processor(raw_image, question, return_tensors="pt").to("cuda", torch.float16)
|
|
58
|
+
out = model.generate(**inputs)
|
|
59
|
+
result_formatted = processor.decode(out[0], skip_special_tokens=True)
|
|
60
|
+
|
|
61
|
+
return result_formatted
|
autogen/agentchat/contrib/captainagent/tools/information_retrieval/optical_character_recognition.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
from autogen.coding.func_with_reqs import with_requirements
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@with_requirements(["easyocr"], ["os"])
|
|
10
|
+
def optical_character_recognition(image):
|
|
11
|
+
"""
|
|
12
|
+
Perform optical character recognition (OCR) on the given image.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
image (Union[str, Image.Image]): The image to perform OCR on. It can be either a file path or an Image object.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
str: The extracted text from the image.
|
|
19
|
+
|
|
20
|
+
Raises:
|
|
21
|
+
FileNotFoundError: If the image file path does not exist.
|
|
22
|
+
"""
|
|
23
|
+
import io
|
|
24
|
+
|
|
25
|
+
import easyocr
|
|
26
|
+
from PIL import Image
|
|
27
|
+
|
|
28
|
+
def image_processing(img):
|
|
29
|
+
if isinstance(img, Image.Image):
|
|
30
|
+
return img.convert("RGB")
|
|
31
|
+
elif isinstance(img, str):
|
|
32
|
+
if os.path.exists(img):
|
|
33
|
+
return Image.open(img).convert("RGB")
|
|
34
|
+
else:
|
|
35
|
+
full_path = img
|
|
36
|
+
if os.path.exists(full_path):
|
|
37
|
+
return Image.open(full_path).convert("RGB")
|
|
38
|
+
else:
|
|
39
|
+
raise FileNotFoundError
|
|
40
|
+
|
|
41
|
+
reader = easyocr.Reader(["en"]) # Load the OCR model into memory
|
|
42
|
+
|
|
43
|
+
if isinstance(image, str):
|
|
44
|
+
# If image is a path, use it directly
|
|
45
|
+
if not os.path.exists(image):
|
|
46
|
+
raise FileNotFoundError
|
|
47
|
+
image_path_or_bytes = image
|
|
48
|
+
else:
|
|
49
|
+
# If image is an Image object, convert it to a bytes stream
|
|
50
|
+
buffer = io.BytesIO()
|
|
51
|
+
image = image_processing(image) # Process the image if needed
|
|
52
|
+
image.save(buffer, format="JPEG")
|
|
53
|
+
buffer.seek(0)
|
|
54
|
+
image_path_or_bytes = buffer
|
|
55
|
+
|
|
56
|
+
# Read text from the image or image path
|
|
57
|
+
result = reader.readtext(image_path_or_bytes)
|
|
58
|
+
|
|
59
|
+
# Extract only the text from the result
|
|
60
|
+
result_text = [text for _, text, _ in result]
|
|
61
|
+
|
|
62
|
+
return ", ".join(result_text)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
def perform_web_search(query, count=10, offset=0):
|
|
5
|
+
"""
|
|
6
|
+
Perform a web search using Bing API.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
query (str): The search query.
|
|
10
|
+
count (int, optional): Number of search results to retrieve. Defaults to 10.
|
|
11
|
+
offset (int, optional): Offset of the first search result. Defaults to 0.
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
The name, URL and snippet of each search result.
|
|
15
|
+
"""
|
|
16
|
+
import os
|
|
17
|
+
|
|
18
|
+
import requests
|
|
19
|
+
|
|
20
|
+
# Get the Bing API key from the environment variable
|
|
21
|
+
bing_api_key = os.getenv("BING_API_KEY")
|
|
22
|
+
|
|
23
|
+
# Check if the API key is available
|
|
24
|
+
if not bing_api_key:
|
|
25
|
+
raise ValueError("Bing API key not found in environment variable")
|
|
26
|
+
|
|
27
|
+
# Set up the API request
|
|
28
|
+
url = "https://api.bing.microsoft.com/v7.0/search"
|
|
29
|
+
headers = {
|
|
30
|
+
"Ocp-Apim-Subscription-Key": bing_api_key,
|
|
31
|
+
}
|
|
32
|
+
params = {
|
|
33
|
+
"q": query,
|
|
34
|
+
"count": count, # Number of search results to retrieve
|
|
35
|
+
"offset": offset, # Offset of the first search result
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
# Send the API request
|
|
39
|
+
response = requests.get(url, headers=headers, params=params)
|
|
40
|
+
response.raise_for_status()
|
|
41
|
+
|
|
42
|
+
# Process the search results
|
|
43
|
+
search_results = response.json()
|
|
44
|
+
for index, result in enumerate(search_results["webPages"]["value"]):
|
|
45
|
+
print(f"Search Result {index+1}:")
|
|
46
|
+
print(result["name"])
|
|
47
|
+
print(result["url"])
|
|
48
|
+
print(result["snippet"])
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
def scrape_wikipedia_tables(url: str, header_keyword: str):
|
|
5
|
+
"""
|
|
6
|
+
Scrapes Wikipedia tables based on a given URL and header keyword.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
url: The URL of the Wikipedia page to scrape.
|
|
10
|
+
header_keyword: The keyword to search for in the headers of the page.
|
|
11
|
+
|
|
12
|
+
Returns:
|
|
13
|
+
list: A list of lists representing the scraped table data. Each inner list represents a row in the table,
|
|
14
|
+
with each element representing a cell value.
|
|
15
|
+
"""
|
|
16
|
+
import requests
|
|
17
|
+
from bs4 import BeautifulSoup
|
|
18
|
+
|
|
19
|
+
response = requests.get(url)
|
|
20
|
+
response.raise_for_status()
|
|
21
|
+
soup = BeautifulSoup(response.content, "html.parser")
|
|
22
|
+
headers = soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"])
|
|
23
|
+
data = []
|
|
24
|
+
for header in headers:
|
|
25
|
+
if header_keyword.lower() in header.text.lower():
|
|
26
|
+
table = header.find_next_sibling("table", class_="wikitable")
|
|
27
|
+
if table:
|
|
28
|
+
rows = table.find_all("tr")
|
|
29
|
+
for row in rows:
|
|
30
|
+
cols = row.find_all(["th", "td"])
|
|
31
|
+
cols = [ele.text.strip() for ele in cols]
|
|
32
|
+
data.append([ele for ele in cols if ele])
|
|
33
|
+
break
|
|
34
|
+
return data
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
from autogen.coding.func_with_reqs import with_requirements
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@with_requirements(["openai-whisper"])
|
|
8
|
+
def transcribe_audio_file(file_path):
|
|
9
|
+
"""
|
|
10
|
+
Transcribes the audio file located at the given file path.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
file_path (str): The path to the audio file.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
str: The transcribed text from the audio file.
|
|
17
|
+
"""
|
|
18
|
+
import whisper
|
|
19
|
+
|
|
20
|
+
model = whisper.load_model("base")
|
|
21
|
+
result = model.transcribe(file_path)
|
|
22
|
+
return result["text"]
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
def youtube_download(url: str):
|
|
5
|
+
"""
|
|
6
|
+
Downloads a YouTube video and returns the download link.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
url: The URL of the YouTube video.
|
|
10
|
+
|
|
11
|
+
Returns:
|
|
12
|
+
str: The download link for the audio.
|
|
13
|
+
"""
|
|
14
|
+
import os
|
|
15
|
+
|
|
16
|
+
import requests
|
|
17
|
+
|
|
18
|
+
endpoint = "https://youtube-mp3-downloader2.p.rapidapi.com/ytmp3/ytmp3/"
|
|
19
|
+
|
|
20
|
+
querystring = {"url": url}
|
|
21
|
+
|
|
22
|
+
headers = {
|
|
23
|
+
"X-RapidAPI-Key": os.environ.get("RAPIDAPI_KEY"),
|
|
24
|
+
"X-RapidAPI-Host": "youtube-mp3-downloader2.p.rapidapi.com",
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
response = requests.get(endpoint, headers=headers, params=querystring)
|
|
28
|
+
response = response.json()
|
|
29
|
+
|
|
30
|
+
if "link" in response:
|
|
31
|
+
return response["link"]
|
|
32
|
+
else:
|
|
33
|
+
print("Error: Unable to retrieve download link.")
|
|
34
|
+
print(response)
|
|
35
|
+
# or you can return an error message
|
|
36
|
+
# return "Error: Unable to retrieve download link."
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
from autogen.coding.func_with_reqs import with_requirements
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@with_requirements(["sympy"])
|
|
8
|
+
def calculate_circle_area_from_diameter(diameter):
|
|
9
|
+
"""
|
|
10
|
+
Calculate the area of a circle given its diameter.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
diameter (float): The diameter of the circle.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
float: The area of the circle.
|
|
17
|
+
"""
|
|
18
|
+
from sympy import pi
|
|
19
|
+
|
|
20
|
+
radius = diameter / 2
|
|
21
|
+
area = pi * radius**2
|
|
22
|
+
return area
|