camel-ai 0.1.5__py3-none-any.whl → 0.1.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/agents/__init__.py +2 -0
- camel/agents/chat_agent.py +217 -36
- camel/agents/deductive_reasoner_agent.py +86 -31
- camel/agents/knowledge_graph_agent.py +41 -18
- camel/agents/role_assignment_agent.py +4 -1
- camel/agents/search_agent.py +122 -0
- camel/bots/__init__.py +20 -0
- camel/bots/discord_bot.py +103 -0
- camel/bots/telegram_bot.py +84 -0
- camel/configs/__init__.py +3 -0
- camel/configs/anthropic_config.py +1 -1
- camel/configs/litellm_config.py +113 -0
- camel/embeddings/__init__.py +2 -0
- camel/embeddings/openai_embedding.py +2 -2
- camel/embeddings/sentence_transformers_embeddings.py +6 -5
- camel/embeddings/vlm_embedding.py +146 -0
- camel/functions/__init__.py +9 -0
- camel/functions/open_api_function.py +150 -29
- camel/functions/open_api_specs/biztoc/__init__.py +13 -0
- camel/functions/open_api_specs/biztoc/ai-plugin.json +34 -0
- camel/functions/open_api_specs/biztoc/openapi.yaml +21 -0
- camel/functions/open_api_specs/create_qr_code/__init__.py +13 -0
- camel/functions/open_api_specs/create_qr_code/openapi.yaml +44 -0
- camel/functions/open_api_specs/nasa_apod/__init__.py +13 -0
- camel/functions/open_api_specs/nasa_apod/openapi.yaml +72 -0
- camel/functions/open_api_specs/outschool/__init__.py +13 -0
- camel/functions/open_api_specs/outschool/ai-plugin.json +34 -0
- camel/functions/open_api_specs/outschool/openapi.yaml +1 -0
- camel/functions/open_api_specs/outschool/paths/__init__.py +14 -0
- camel/functions/open_api_specs/outschool/paths/get_classes.py +29 -0
- camel/functions/open_api_specs/outschool/paths/search_teachers.py +29 -0
- camel/functions/open_api_specs/security_config.py +21 -0
- camel/functions/open_api_specs/web_scraper/__init__.py +13 -0
- camel/functions/open_api_specs/web_scraper/ai-plugin.json +34 -0
- camel/functions/open_api_specs/web_scraper/openapi.yaml +71 -0
- camel/functions/open_api_specs/web_scraper/paths/__init__.py +13 -0
- camel/functions/open_api_specs/web_scraper/paths/scraper.py +29 -0
- camel/functions/openai_function.py +3 -1
- camel/functions/search_functions.py +104 -171
- camel/functions/slack_functions.py +2 -1
- camel/human.py +3 -1
- camel/loaders/base_io.py +3 -1
- camel/loaders/unstructured_io.py +16 -22
- camel/messages/base.py +135 -46
- camel/models/__init__.py +4 -0
- camel/models/anthropic_model.py +20 -14
- camel/models/base_model.py +2 -0
- camel/models/litellm_model.py +112 -0
- camel/models/model_factory.py +8 -1
- camel/models/open_source_model.py +1 -0
- camel/models/openai_model.py +6 -2
- camel/models/zhipuai_model.py +125 -0
- camel/prompts/__init__.py +2 -0
- camel/prompts/base.py +2 -1
- camel/prompts/descripte_video_prompt.py +33 -0
- camel/prompts/task_prompt_template.py +9 -3
- camel/retrievers/auto_retriever.py +20 -11
- camel/retrievers/base.py +4 -2
- camel/retrievers/bm25_retriever.py +2 -1
- camel/retrievers/cohere_rerank_retriever.py +2 -1
- camel/retrievers/vector_retriever.py +10 -4
- camel/societies/babyagi_playing.py +2 -1
- camel/societies/role_playing.py +2 -1
- camel/storages/graph_storages/base.py +1 -0
- camel/storages/graph_storages/neo4j_graph.py +5 -3
- camel/storages/vectordb_storages/base.py +2 -1
- camel/storages/vectordb_storages/milvus.py +5 -2
- camel/toolkits/github_toolkit.py +120 -26
- camel/types/__init__.py +3 -2
- camel/types/enums.py +25 -1
- camel/utils/__init__.py +11 -2
- camel/utils/commons.py +74 -4
- camel/utils/constants.py +26 -0
- camel/utils/token_counting.py +58 -5
- {camel_ai-0.1.5.dist-info → camel_ai-0.1.5.2.dist-info}/METADATA +29 -13
- camel_ai-0.1.5.2.dist-info/RECORD +148 -0
- camel_ai-0.1.5.dist-info/RECORD +0 -119
- {camel_ai-0.1.5.dist-info → camel_ai-0.1.5.2.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
"""Search for teachers on Outschool."""
|
|
15
|
+
|
|
16
|
+
from typing import Any, Dict
|
|
17
|
+
|
|
18
|
+
import requests
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def call_api(input_json: Dict[str, Any]) -> Dict[str, Any]:
|
|
22
|
+
response = requests.get(
|
|
23
|
+
"https://chatgpt-plugin.outschool.com/api/teachers", params=input_json
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
if response.status_code == 200:
|
|
27
|
+
return response.json()
|
|
28
|
+
else:
|
|
29
|
+
return {"status_code": response.status_code, "text": response.text}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
from camel.types import OpenAPIName
|
|
15
|
+
|
|
16
|
+
openapi_security_config = {
|
|
17
|
+
OpenAPIName.NASA_APOD.value: {
|
|
18
|
+
"api_key": "NASA_API_KEY",
|
|
19
|
+
"get_api_key_url": "https://api.nasa.gov/",
|
|
20
|
+
},
|
|
21
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "plugin-0609b24f-5c80-4864-af90-c7c570d65375",
|
|
3
|
+
"domain": "scraper.gafo.tech",
|
|
4
|
+
"namespace": "web_scraper",
|
|
5
|
+
"status": "approved",
|
|
6
|
+
"manifest": {
|
|
7
|
+
"schema_version": "v1",
|
|
8
|
+
"name_for_model": "web_scraper",
|
|
9
|
+
"name_for_human": "Scraper",
|
|
10
|
+
"description_for_model": "Scrape content from webpages by providing a URL.",
|
|
11
|
+
"description_for_human": "Scrape content from webpages by providing a URL.",
|
|
12
|
+
"auth": {
|
|
13
|
+
"type": "none"
|
|
14
|
+
},
|
|
15
|
+
"api": {
|
|
16
|
+
"type": "openapi",
|
|
17
|
+
"url": "https://scraper.gafo.tech/openapi.yaml"
|
|
18
|
+
},
|
|
19
|
+
"logo_url": "https://scraper.gafo.tech/logo.png",
|
|
20
|
+
"contact_email": "gafotech1@gmail.com",
|
|
21
|
+
"legal_info_url": "https://scraper.gafo.tech/legal"
|
|
22
|
+
},
|
|
23
|
+
"oauth_client_id": null,
|
|
24
|
+
"user_settings": {
|
|
25
|
+
"is_installed": false,
|
|
26
|
+
"is_authenticated": true
|
|
27
|
+
},
|
|
28
|
+
"categories": [
|
|
29
|
+
{
|
|
30
|
+
"id": "newly_added",
|
|
31
|
+
"title": "New"
|
|
32
|
+
}
|
|
33
|
+
]
|
|
34
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
openapi: 3.0.1
|
|
2
|
+
info:
|
|
3
|
+
title: Scraper
|
|
4
|
+
description: Scrape content from webpages by providing a URL.
|
|
5
|
+
version: "v1"
|
|
6
|
+
servers:
|
|
7
|
+
- url: https://scraper.gafo.tech
|
|
8
|
+
paths:
|
|
9
|
+
/scrape:
|
|
10
|
+
post:
|
|
11
|
+
operationId: scrape
|
|
12
|
+
summary: Scrape content from a webpage
|
|
13
|
+
requestBody:
|
|
14
|
+
required: true
|
|
15
|
+
content:
|
|
16
|
+
application/json:
|
|
17
|
+
schema:
|
|
18
|
+
type: object
|
|
19
|
+
properties:
|
|
20
|
+
url:
|
|
21
|
+
type: string
|
|
22
|
+
format: uri
|
|
23
|
+
example: https://example.com
|
|
24
|
+
type:
|
|
25
|
+
type: string
|
|
26
|
+
enum: [text, links, images]
|
|
27
|
+
default: text
|
|
28
|
+
example: text
|
|
29
|
+
required:
|
|
30
|
+
- url
|
|
31
|
+
responses:
|
|
32
|
+
"200":
|
|
33
|
+
description: OK
|
|
34
|
+
content:
|
|
35
|
+
application/json:
|
|
36
|
+
schema:
|
|
37
|
+
type: object
|
|
38
|
+
properties:
|
|
39
|
+
text:
|
|
40
|
+
type: string
|
|
41
|
+
description: The text content of the webpage. Returned when type is text or not provided.
|
|
42
|
+
links:
|
|
43
|
+
type: array
|
|
44
|
+
items:
|
|
45
|
+
type: object
|
|
46
|
+
description: The array of link objects with all attributes from the webpage. Returned when type is links.
|
|
47
|
+
images:
|
|
48
|
+
type: array
|
|
49
|
+
items:
|
|
50
|
+
type: object
|
|
51
|
+
description: The array of image objects with all attributes from the webpage. Returned when type is images.
|
|
52
|
+
"400":
|
|
53
|
+
description: Bad Request
|
|
54
|
+
content:
|
|
55
|
+
application/json:
|
|
56
|
+
schema:
|
|
57
|
+
type: object
|
|
58
|
+
properties:
|
|
59
|
+
error:
|
|
60
|
+
type: string
|
|
61
|
+
description: The error message.
|
|
62
|
+
"500":
|
|
63
|
+
description: Internal Server Error
|
|
64
|
+
content:
|
|
65
|
+
application/json:
|
|
66
|
+
schema:
|
|
67
|
+
type: object
|
|
68
|
+
properties:
|
|
69
|
+
error:
|
|
70
|
+
type: string
|
|
71
|
+
description: The error message.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
"""Scrape data from a website using the Scraper API."""
|
|
15
|
+
|
|
16
|
+
from typing import Any, Dict
|
|
17
|
+
|
|
18
|
+
import requests
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def call_api(input_json: Dict[str, Any]) -> Dict[str, Any]:
|
|
22
|
+
response = requests.post(
|
|
23
|
+
"https://scraper.gafo.tech/scrape", json=input_json
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
if response.status_code == 200:
|
|
27
|
+
return response.json()
|
|
28
|
+
else:
|
|
29
|
+
return {"status_code": response.status_code, "text": response.text}
|
|
@@ -173,7 +173,9 @@ class OpenAIFunction:
|
|
|
173
173
|
)
|
|
174
174
|
|
|
175
175
|
@staticmethod
|
|
176
|
-
def validate_openai_tool_schema(
|
|
176
|
+
def validate_openai_tool_schema(
|
|
177
|
+
openai_tool_schema: Dict[str, Any],
|
|
178
|
+
) -> None:
|
|
177
179
|
r"""Validates the OpenAI tool schema against
|
|
178
180
|
:obj:`ToolAssistantToolsFunction`.
|
|
179
181
|
This function checks if the provided :obj:`openai_tool_schema` adheres
|
|
@@ -14,15 +14,12 @@
|
|
|
14
14
|
import os
|
|
15
15
|
from typing import Any, Dict, List
|
|
16
16
|
|
|
17
|
-
from camel.agents import ChatAgent
|
|
18
17
|
from camel.functions.openai_function import OpenAIFunction
|
|
19
|
-
from camel.messages import BaseMessage
|
|
20
|
-
from camel.prompts import TextPrompt
|
|
21
18
|
|
|
22
19
|
|
|
23
20
|
def search_wiki(entity: str) -> str:
|
|
24
|
-
r"""Search the entity in WikiPedia and return the summary of the
|
|
25
|
-
|
|
21
|
+
r"""Search the entity in WikiPedia and return the summary of the required
|
|
22
|
+
page, containing factual information about the given entity.
|
|
26
23
|
|
|
27
24
|
Args:
|
|
28
25
|
entity (str): The entity to be searched.
|
|
@@ -59,11 +56,101 @@ def search_wiki(entity: str) -> str:
|
|
|
59
56
|
return result
|
|
60
57
|
|
|
61
58
|
|
|
62
|
-
def
|
|
59
|
+
def search_duckduckgo(
|
|
60
|
+
query: str, source: str = "text", max_results: int = 10
|
|
61
|
+
) -> List[Dict[str, Any]]:
|
|
62
|
+
r"""Use DuckDuckGo search engine to search information for the given query.
|
|
63
|
+
|
|
64
|
+
This function queries the DuckDuckGo API for related topics to the given
|
|
65
|
+
search term. The results are formatted into a list of dictionaries, each
|
|
66
|
+
representing a search result.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
query (str): The query to be searched.
|
|
70
|
+
source (str): The type of information to query (e.g., "text",
|
|
71
|
+
"images", "videos"). Defaults to "text".
|
|
72
|
+
max_results (int): Max number of results, defaults to `10`.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
List[Dict[str, Any]]: A list of dictionaries where each dictionary
|
|
76
|
+
represents a search result.
|
|
77
|
+
"""
|
|
78
|
+
from duckduckgo_search import DDGS
|
|
79
|
+
from requests.exceptions import RequestException
|
|
80
|
+
|
|
81
|
+
ddgs = DDGS()
|
|
82
|
+
responses: List[Dict[str, Any]] = []
|
|
83
|
+
|
|
84
|
+
if source == "text":
|
|
85
|
+
try:
|
|
86
|
+
results = ddgs.text(keywords=query, max_results=max_results)
|
|
87
|
+
except RequestException as e:
|
|
88
|
+
# Handle specific exceptions or general request exceptions
|
|
89
|
+
responses.append({"error": f"duckduckgo search failed.{e}"})
|
|
90
|
+
|
|
91
|
+
# Iterate over results found
|
|
92
|
+
for i, result in enumerate(results, start=1):
|
|
93
|
+
# Creating a response object with a similar structure
|
|
94
|
+
response = {
|
|
95
|
+
"result_id": i,
|
|
96
|
+
"title": result["title"],
|
|
97
|
+
"description": result["body"],
|
|
98
|
+
"url": result["href"],
|
|
99
|
+
}
|
|
100
|
+
responses.append(response)
|
|
101
|
+
|
|
102
|
+
elif source == "images":
|
|
103
|
+
try:
|
|
104
|
+
results = ddgs.images(keywords=query, max_results=max_results)
|
|
105
|
+
except RequestException as e:
|
|
106
|
+
# Handle specific exceptions or general request exceptions
|
|
107
|
+
responses.append({"error": f"duckduckgo search failed.{e}"})
|
|
108
|
+
|
|
109
|
+
# Iterate over results found
|
|
110
|
+
for i, result in enumerate(results, start=1):
|
|
111
|
+
# Creating a response object with a similar structure
|
|
112
|
+
response = {
|
|
113
|
+
"result_id": i,
|
|
114
|
+
"title": result["title"],
|
|
115
|
+
"image": result["image"],
|
|
116
|
+
"url": result["url"],
|
|
117
|
+
"source": result["source"],
|
|
118
|
+
}
|
|
119
|
+
responses.append(response)
|
|
120
|
+
|
|
121
|
+
elif source == "videos":
|
|
122
|
+
try:
|
|
123
|
+
results = ddgs.videos(keywords=query, max_results=max_results)
|
|
124
|
+
except RequestException as e:
|
|
125
|
+
# Handle specific exceptions or general request exceptions
|
|
126
|
+
responses.append({"error": f"duckduckgo search failed.{e}"})
|
|
127
|
+
|
|
128
|
+
# Iterate over results found
|
|
129
|
+
for i, result in enumerate(results, start=1):
|
|
130
|
+
# Creating a response object with a similar structure
|
|
131
|
+
response = {
|
|
132
|
+
"result_id": i,
|
|
133
|
+
"title": result["title"],
|
|
134
|
+
"description": result["description"],
|
|
135
|
+
"embed_url": result["embed_url"],
|
|
136
|
+
"publisher": result["publisher"],
|
|
137
|
+
"duration": result["duration"],
|
|
138
|
+
"published": result["published"],
|
|
139
|
+
}
|
|
140
|
+
responses.append(response)
|
|
141
|
+
|
|
142
|
+
# If no answer found, return an empty list
|
|
143
|
+
return responses
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def search_google(
|
|
147
|
+
query: str, num_result_pages: int = 10
|
|
148
|
+
) -> List[Dict[str, Any]]:
|
|
63
149
|
r"""Use Google search engine to search information for the given query.
|
|
64
150
|
|
|
65
151
|
Args:
|
|
66
152
|
query (str): The query to be searched.
|
|
153
|
+
num_result_pages (int): The number of result pages to retrieve.
|
|
67
154
|
|
|
68
155
|
Returns:
|
|
69
156
|
List[Dict[str, Any]]: A list of dictionaries where each dictionary
|
|
@@ -82,7 +169,7 @@ def search_google(query: str) -> List[Dict[str, Any]]:
|
|
|
82
169
|
'description': 'An organization focused on ensuring that
|
|
83
170
|
artificial general intelligence benefits all of humanity.',
|
|
84
171
|
'long_description': 'OpenAI is a non-profit artificial
|
|
85
|
-
|
|
172
|
+
intelligence research company. Our goal is to advance digital
|
|
86
173
|
intelligence in the way that is most likely to benefit humanity
|
|
87
174
|
as a whole',
|
|
88
175
|
'url': 'https://www.openai.com'
|
|
@@ -148,172 +235,12 @@ def search_google(query: str) -> List[Dict[str, Any]]:
|
|
|
148
235
|
responses.append({"error": "google search failed."})
|
|
149
236
|
|
|
150
237
|
except requests.RequestException:
|
|
238
|
+
# Handle specific exceptions or general request exceptions
|
|
151
239
|
responses.append({"error": "google search failed."})
|
|
152
|
-
|
|
240
|
+
# If no answer found, return an empty list
|
|
153
241
|
return responses
|
|
154
242
|
|
|
155
243
|
|
|
156
|
-
def text_extract_from_web(url: str) -> str:
|
|
157
|
-
r"""Get the text information from given url.
|
|
158
|
-
|
|
159
|
-
Args:
|
|
160
|
-
url (str): The website you want to search.
|
|
161
|
-
|
|
162
|
-
Returns:
|
|
163
|
-
str: All texts extract from the web.
|
|
164
|
-
"""
|
|
165
|
-
import requests
|
|
166
|
-
from bs4 import BeautifulSoup
|
|
167
|
-
|
|
168
|
-
try:
|
|
169
|
-
# Request the target page
|
|
170
|
-
response_text = requests.get(url).text
|
|
171
|
-
|
|
172
|
-
# Parse the obtained page
|
|
173
|
-
soup = BeautifulSoup(response_text, features="html.parser")
|
|
174
|
-
|
|
175
|
-
for script in soup(["script", "style"]):
|
|
176
|
-
script.extract()
|
|
177
|
-
|
|
178
|
-
text = soup.get_text()
|
|
179
|
-
# Strip text
|
|
180
|
-
lines = (line.strip() for line in text.splitlines())
|
|
181
|
-
chunks = (
|
|
182
|
-
phrase.strip() for line in lines for phrase in line.split(" ")
|
|
183
|
-
)
|
|
184
|
-
text = ".".join(chunk for chunk in chunks if chunk)
|
|
185
|
-
|
|
186
|
-
except requests.RequestException:
|
|
187
|
-
text = f"can't access {url}"
|
|
188
|
-
|
|
189
|
-
return text
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
# Split a text into smaller chunks of size n
|
|
193
|
-
def create_chunks(text: str, n: int) -> List[str]:
|
|
194
|
-
r"""Returns successive n-sized chunks from provided text."
|
|
195
|
-
|
|
196
|
-
Args:
|
|
197
|
-
text (str): The text to be split.
|
|
198
|
-
n (int): The max length of a single chunk.
|
|
199
|
-
|
|
200
|
-
Returns:
|
|
201
|
-
List[str]: A list of split texts.
|
|
202
|
-
"""
|
|
203
|
-
|
|
204
|
-
chunks = []
|
|
205
|
-
i = 0
|
|
206
|
-
while i < len(text):
|
|
207
|
-
# Find the nearest end of sentence within a range of 0.5 * n
|
|
208
|
-
# and 1.5 * n tokens
|
|
209
|
-
j = min(i + int(1.2 * n), len(text))
|
|
210
|
-
while j > i + int(0.8 * n):
|
|
211
|
-
# Decode the tokens and check for full stop or newline
|
|
212
|
-
chunk = text[i:j]
|
|
213
|
-
if chunk.endswith(".") or chunk.endswith("\n"):
|
|
214
|
-
break
|
|
215
|
-
j -= 1
|
|
216
|
-
# If no end of sentence found, use n tokens as the chunk size
|
|
217
|
-
if j == i + int(0.8 * n):
|
|
218
|
-
j = min(i + n, len(text))
|
|
219
|
-
chunks.append(text[i:j])
|
|
220
|
-
i = j
|
|
221
|
-
return chunks
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
def prompt_single_step_agent(prompt: str) -> str:
|
|
225
|
-
"""Prompt a single-step agent to summarize texts or answer a question."""
|
|
226
|
-
|
|
227
|
-
assistant_sys_msg = BaseMessage.make_assistant_message(
|
|
228
|
-
role_name="Assistant",
|
|
229
|
-
content="You are a helpful assistant.",
|
|
230
|
-
)
|
|
231
|
-
agent = ChatAgent(assistant_sys_msg)
|
|
232
|
-
agent.reset()
|
|
233
|
-
|
|
234
|
-
user_msg = BaseMessage.make_user_message(
|
|
235
|
-
role_name="User",
|
|
236
|
-
content=prompt,
|
|
237
|
-
)
|
|
238
|
-
assistant_response = agent.step(user_msg)
|
|
239
|
-
if assistant_response.msgs is not None:
|
|
240
|
-
return assistant_response.msg.content
|
|
241
|
-
return ""
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
def summarize_text(text: str, query: str) -> str:
|
|
245
|
-
r"""Summarize the information from the text, base on the query if query is
|
|
246
|
-
given.
|
|
247
|
-
|
|
248
|
-
Args:
|
|
249
|
-
text (str): Text to summarize.
|
|
250
|
-
query (str): What information you want.
|
|
251
|
-
|
|
252
|
-
Returns:
|
|
253
|
-
str: Strings with information.
|
|
254
|
-
"""
|
|
255
|
-
summary_prompt = TextPrompt(
|
|
256
|
-
'''Gather information from this text that relative to the question, but
|
|
257
|
-
do not directly answer the question.\nquestion: {query}\ntext '''
|
|
258
|
-
)
|
|
259
|
-
summary_prompt = summary_prompt.format(query=query)
|
|
260
|
-
# Max length of each chunk
|
|
261
|
-
max_len = 3000
|
|
262
|
-
results = ""
|
|
263
|
-
chunks = create_chunks(text, max_len)
|
|
264
|
-
# Summarize
|
|
265
|
-
for i, chunk in enumerate(chunks, start=1):
|
|
266
|
-
prompt = summary_prompt + str(i) + ": " + chunk
|
|
267
|
-
result = prompt_single_step_agent(prompt)
|
|
268
|
-
results += result + "\n"
|
|
269
|
-
|
|
270
|
-
# Final summarise
|
|
271
|
-
final_prompt = TextPrompt(
|
|
272
|
-
'''Here are some summarized texts which split from one text, Using the
|
|
273
|
-
information to answer the question: {query}.\n\nText: '''
|
|
274
|
-
)
|
|
275
|
-
final_prompt = final_prompt.format(query=query)
|
|
276
|
-
prompt = final_prompt + results
|
|
277
|
-
|
|
278
|
-
response = prompt_single_step_agent(prompt)
|
|
279
|
-
|
|
280
|
-
return response
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
def search_google_and_summarize(query: str) -> str:
|
|
284
|
-
r"""Search webs for information. Given a query, this function will use
|
|
285
|
-
the Google search engine to search for related information from the
|
|
286
|
-
internet, and then return a summarized answer.
|
|
287
|
-
|
|
288
|
-
Args:
|
|
289
|
-
query (str): Question you want to be answered.
|
|
290
|
-
|
|
291
|
-
Returns:
|
|
292
|
-
str: Summarized information from webs.
|
|
293
|
-
"""
|
|
294
|
-
# Google search will return a list of urls
|
|
295
|
-
responses = search_google(query)
|
|
296
|
-
for item in responses:
|
|
297
|
-
if "url" in item:
|
|
298
|
-
url = item.get("url")
|
|
299
|
-
# Extract text
|
|
300
|
-
text = text_extract_from_web(str(url))
|
|
301
|
-
# Using chatgpt summarise text
|
|
302
|
-
answer = summarize_text(text, query)
|
|
303
|
-
|
|
304
|
-
# Let chatgpt decide whether to continue search or not
|
|
305
|
-
prompt = TextPrompt(
|
|
306
|
-
'''Do you think the answer: {answer} can answer the query:
|
|
307
|
-
{query}. Use only 'yes' or 'no' to answer.'''
|
|
308
|
-
)
|
|
309
|
-
prompt = prompt.format(answer=answer, query=query)
|
|
310
|
-
reply = prompt_single_step_agent(prompt)
|
|
311
|
-
if "yes" in str(reply).lower():
|
|
312
|
-
return answer
|
|
313
|
-
|
|
314
|
-
return "Failed to find the answer from google search."
|
|
315
|
-
|
|
316
|
-
|
|
317
244
|
def query_wolfram_alpha(query: str, is_detailed: bool) -> str:
|
|
318
245
|
r"""Queries Wolfram|Alpha and returns the result. Wolfram|Alpha is an
|
|
319
246
|
answer engine developed by Wolfram Research. It is offered as an online
|
|
@@ -332,7 +259,8 @@ def query_wolfram_alpha(query: str, is_detailed: bool) -> str:
|
|
|
332
259
|
import wolframalpha
|
|
333
260
|
except ImportError:
|
|
334
261
|
raise ImportError(
|
|
335
|
-
"Please install `wolframalpha` first. You can install it by
|
|
262
|
+
"Please install `wolframalpha` first. You can install it by"
|
|
263
|
+
" running `pip install wolframalpha`."
|
|
336
264
|
)
|
|
337
265
|
|
|
338
266
|
WOLFRAMALPHA_APP_ID = os.environ.get('WOLFRAMALPHA_APP_ID')
|
|
@@ -370,5 +298,10 @@ def query_wolfram_alpha(query: str, is_detailed: bool) -> str:
|
|
|
370
298
|
|
|
371
299
|
SEARCH_FUNCS: List[OpenAIFunction] = [
|
|
372
300
|
OpenAIFunction(func) # type: ignore[arg-type]
|
|
373
|
-
for func in [
|
|
301
|
+
for func in [
|
|
302
|
+
search_wiki,
|
|
303
|
+
search_google,
|
|
304
|
+
search_duckduckgo,
|
|
305
|
+
query_wolfram_alpha,
|
|
306
|
+
]
|
|
374
307
|
]
|
|
@@ -65,7 +65,8 @@ def _login_slack(
|
|
|
65
65
|
)
|
|
66
66
|
if not slack_token:
|
|
67
67
|
raise KeyError(
|
|
68
|
-
"SLACK_BOT_TOKEN or SLACK_USER_TOKEN environment variable not
|
|
68
|
+
"SLACK_BOT_TOKEN or SLACK_USER_TOKEN environment variable not "
|
|
69
|
+
"set."
|
|
69
70
|
)
|
|
70
71
|
|
|
71
72
|
client = WebClient(token=slack_token, ssl=ssl)
|
camel/human.py
CHANGED
|
@@ -112,7 +112,9 @@ class Human:
|
|
|
112
112
|
|
|
113
113
|
return content
|
|
114
114
|
|
|
115
|
-
def reduce_step(
|
|
115
|
+
def reduce_step(
|
|
116
|
+
self, messages: Sequence[BaseMessage]
|
|
117
|
+
) -> ChatAgentResponse:
|
|
116
118
|
r"""Performs one step of the conversation by displaying options to the
|
|
117
119
|
user, getting their input, and parsing their choice.
|
|
118
120
|
|
camel/loaders/base_io.py
CHANGED
|
@@ -65,7 +65,9 @@ class File(ABC):
|
|
|
65
65
|
)
|
|
66
66
|
|
|
67
67
|
def __str__(self) -> str:
|
|
68
|
-
return
|
|
68
|
+
return (
|
|
69
|
+
f"File(name={self.name}, id={self.id}, metadata={self.metadata})"
|
|
70
|
+
)
|
|
69
71
|
|
|
70
72
|
def copy(self) -> "File":
|
|
71
73
|
r"""Create a deep copy of this File"""
|
camel/loaders/unstructured_io.py
CHANGED
|
@@ -131,30 +131,22 @@ class UnstructuredIO:
|
|
|
131
131
|
self,
|
|
132
132
|
input_path: str,
|
|
133
133
|
**kwargs: Any,
|
|
134
|
-
) ->
|
|
135
|
-
r"""Loads a file or a URL and parses its contents
|
|
134
|
+
) -> List[Element]:
|
|
135
|
+
r"""Loads a file or a URL and parses its contents into elements.
|
|
136
136
|
|
|
137
137
|
Args:
|
|
138
138
|
input_path (str): Path to the file or URL to be parsed.
|
|
139
139
|
**kwargs: Extra kwargs passed to the partition function.
|
|
140
140
|
|
|
141
141
|
Returns:
|
|
142
|
-
List[
|
|
143
|
-
dict, list, etc., depending on the content. If return_str is
|
|
144
|
-
True, returns a tuple with a string representation of the
|
|
145
|
-
elements and the elements themselves.
|
|
142
|
+
List[Element]: List of elements after parsing the file or URL.
|
|
146
143
|
|
|
147
144
|
Raises:
|
|
148
|
-
FileNotFoundError: If the file does not exist
|
|
149
|
-
|
|
145
|
+
FileNotFoundError: If the file does not exist at the path
|
|
146
|
+
specified.
|
|
150
147
|
Exception: For any other issues during file or URL parsing.
|
|
151
148
|
|
|
152
149
|
Notes:
|
|
153
|
-
By default we use the basic "unstructured" library,
|
|
154
|
-
if you are processing document types beyond the basics,
|
|
155
|
-
you can install the necessary extras like:
|
|
156
|
-
`pip install "unstructured[docx,pptx]"` or
|
|
157
|
-
`pip install "unstructured[all-docs]"`.
|
|
158
150
|
Available document types:
|
|
159
151
|
"csv", "doc", "docx", "epub", "image", "md", "msg", "odt",
|
|
160
152
|
"org", "pdf", "ppt", "pptx", "rtf", "rst", "tsv", "xlsx".
|
|
@@ -185,7 +177,9 @@ class UnstructuredIO:
|
|
|
185
177
|
|
|
186
178
|
# Check if the file exists
|
|
187
179
|
if not os.path.exists(input_path):
|
|
188
|
-
raise FileNotFoundError(
|
|
180
|
+
raise FileNotFoundError(
|
|
181
|
+
f"The file {input_path} was not found."
|
|
182
|
+
)
|
|
189
183
|
|
|
190
184
|
# Read the file
|
|
191
185
|
try:
|
|
@@ -193,7 +187,9 @@ class UnstructuredIO:
|
|
|
193
187
|
elements = partition(file=f, **kwargs)
|
|
194
188
|
return elements
|
|
195
189
|
except Exception as e:
|
|
196
|
-
raise Exception(
|
|
190
|
+
raise Exception(
|
|
191
|
+
"Failed to parse the unstructured file."
|
|
192
|
+
) from e
|
|
197
193
|
|
|
198
194
|
def clean_text_data(
|
|
199
195
|
self,
|
|
@@ -433,9 +429,8 @@ class UnstructuredIO:
|
|
|
433
429
|
els, kw.get('metadata', [])
|
|
434
430
|
),
|
|
435
431
|
"stage_for_baseplate": baseplate.stage_for_baseplate,
|
|
436
|
-
"stage_for_datasaur": lambda els,
|
|
437
|
-
|
|
438
|
-
),
|
|
432
|
+
"stage_for_datasaur": lambda els,
|
|
433
|
+
**kw: datasaur.stage_for_datasaur(els, kw.get('entities', [])),
|
|
439
434
|
"stage_for_label_box": lambda els,
|
|
440
435
|
**kw: label_box.stage_for_label_box(els, **kw),
|
|
441
436
|
"stage_for_label_studio": lambda els,
|
|
@@ -450,11 +445,11 @@ class UnstructuredIO:
|
|
|
450
445
|
|
|
451
446
|
def chunk_elements(
|
|
452
447
|
self, elements: List[Any], chunk_type: str, **kwargs
|
|
453
|
-
) -> List[
|
|
448
|
+
) -> List[Element]:
|
|
454
449
|
r"""Chunks elements by titles.
|
|
455
450
|
|
|
456
451
|
Args:
|
|
457
|
-
elements (List[
|
|
452
|
+
elements (List[Element]): List of Element objects to be chunked.
|
|
458
453
|
chunk_type (str): Type chunk going to apply. Supported types:
|
|
459
454
|
'chunk_by_title'.
|
|
460
455
|
**kwargs: Additional keyword arguments for chunking.
|
|
@@ -531,8 +526,7 @@ class UnstructuredIO:
|
|
|
531
526
|
account_name: str,
|
|
532
527
|
num_processes: int = 2,
|
|
533
528
|
) -> None:
|
|
534
|
-
"""
|
|
535
|
-
Processes documents from an Azure storage container and stores
|
|
529
|
+
r"""Processes documents from an Azure storage container and stores
|
|
536
530
|
structured outputs locally.
|
|
537
531
|
|
|
538
532
|
Args:
|