camel-ai 0.2.0__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +326 -115
- camel/agents/knowledge_graph_agent.py +4 -6
- camel/bots/__init__.py +34 -0
- camel/bots/discord_app.py +138 -0
- camel/bots/slack/__init__.py +30 -0
- camel/bots/slack/models.py +158 -0
- camel/bots/slack/slack_app.py +255 -0
- camel/bots/telegram_bot.py +82 -0
- camel/configs/__init__.py +1 -2
- camel/configs/anthropic_config.py +2 -5
- camel/configs/base_config.py +6 -6
- camel/configs/gemini_config.py +1 -1
- camel/configs/groq_config.py +2 -3
- camel/configs/ollama_config.py +1 -2
- camel/configs/openai_config.py +2 -23
- camel/configs/samba_config.py +2 -2
- camel/configs/togetherai_config.py +1 -1
- camel/configs/vllm_config.py +1 -1
- camel/configs/zhipuai_config.py +2 -3
- camel/embeddings/openai_embedding.py +2 -2
- camel/loaders/__init__.py +2 -0
- camel/loaders/chunkr_reader.py +163 -0
- camel/loaders/firecrawl_reader.py +13 -45
- camel/loaders/unstructured_io.py +65 -29
- camel/messages/__init__.py +1 -0
- camel/messages/func_message.py +2 -2
- camel/models/__init__.py +2 -4
- camel/models/anthropic_model.py +32 -26
- camel/models/azure_openai_model.py +39 -36
- camel/models/base_model.py +31 -20
- camel/models/gemini_model.py +37 -29
- camel/models/groq_model.py +29 -23
- camel/models/litellm_model.py +44 -61
- camel/models/mistral_model.py +33 -30
- camel/models/model_factory.py +66 -76
- camel/models/nemotron_model.py +33 -23
- camel/models/ollama_model.py +42 -47
- camel/models/{openai_compatibility_model.py → openai_compatible_model.py} +36 -41
- camel/models/openai_model.py +48 -29
- camel/models/reka_model.py +30 -28
- camel/models/samba_model.py +82 -177
- camel/models/stub_model.py +2 -2
- camel/models/togetherai_model.py +37 -43
- camel/models/vllm_model.py +43 -50
- camel/models/zhipuai_model.py +33 -27
- camel/retrievers/auto_retriever.py +28 -10
- camel/retrievers/vector_retriever.py +72 -44
- camel/societies/babyagi_playing.py +6 -3
- camel/societies/role_playing.py +17 -3
- camel/storages/__init__.py +2 -0
- camel/storages/graph_storages/__init__.py +2 -0
- camel/storages/graph_storages/graph_element.py +3 -5
- camel/storages/graph_storages/nebula_graph.py +547 -0
- camel/storages/key_value_storages/json.py +6 -1
- camel/tasks/task.py +11 -4
- camel/tasks/task_prompt.py +4 -0
- camel/toolkits/__init__.py +20 -7
- camel/toolkits/arxiv_toolkit.py +155 -0
- camel/toolkits/ask_news_toolkit.py +653 -0
- camel/toolkits/base.py +2 -3
- camel/toolkits/code_execution.py +6 -7
- camel/toolkits/dalle_toolkit.py +6 -6
- camel/toolkits/{openai_function.py → function_tool.py} +34 -11
- camel/toolkits/github_toolkit.py +9 -10
- camel/toolkits/google_maps_toolkit.py +7 -7
- camel/toolkits/google_scholar_toolkit.py +146 -0
- camel/toolkits/linkedin_toolkit.py +7 -7
- camel/toolkits/math_toolkit.py +8 -8
- camel/toolkits/open_api_toolkit.py +5 -5
- camel/toolkits/reddit_toolkit.py +7 -7
- camel/toolkits/retrieval_toolkit.py +5 -5
- camel/toolkits/search_toolkit.py +9 -9
- camel/toolkits/slack_toolkit.py +11 -11
- camel/toolkits/twitter_toolkit.py +378 -452
- camel/toolkits/weather_toolkit.py +6 -6
- camel/toolkits/whatsapp_toolkit.py +177 -0
- camel/types/__init__.py +6 -1
- camel/types/enums.py +43 -85
- camel/types/openai_types.py +3 -0
- camel/types/unified_model_type.py +104 -0
- camel/utils/__init__.py +0 -2
- camel/utils/async_func.py +7 -7
- camel/utils/commons.py +40 -4
- camel/utils/token_counting.py +30 -212
- camel/workforce/__init__.py +6 -6
- camel/workforce/base.py +9 -5
- camel/workforce/prompts.py +179 -0
- camel/workforce/role_playing_worker.py +181 -0
- camel/workforce/{single_agent_node.py → single_agent_worker.py} +49 -23
- camel/workforce/task_channel.py +7 -8
- camel/workforce/utils.py +20 -50
- camel/workforce/{worker_node.py → worker.py} +15 -12
- camel/workforce/workforce.py +456 -19
- camel_ai-0.2.3.dist-info/LICENSE +201 -0
- {camel_ai-0.2.0.dist-info → camel_ai-0.2.3.dist-info}/METADATA +39 -65
- {camel_ai-0.2.0.dist-info → camel_ai-0.2.3.dist-info}/RECORD +98 -86
- {camel_ai-0.2.0.dist-info → camel_ai-0.2.3.dist-info}/WHEEL +1 -1
- camel/models/open_source_model.py +0 -170
- camel/workforce/manager_node.py +0 -299
- camel/workforce/role_playing_node.py +0 -168
- camel/workforce/workforce_prompt.py +0 -125
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
import os
|
|
15
|
+
from typing import TYPE_CHECKING, Optional
|
|
16
|
+
|
|
17
|
+
from camel.agents import ChatAgent
|
|
18
|
+
from camel.messages import BaseMessage
|
|
19
|
+
from camel.utils import dependencies_required
|
|
20
|
+
|
|
21
|
+
# Conditionally import telebot types only for type checking
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from telebot.types import ( # type: ignore[import-untyped]
|
|
24
|
+
Message,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class TelegramBot:
|
|
29
|
+
r"""Represents a Telegram bot that is powered by an agent.
|
|
30
|
+
|
|
31
|
+
Attributes:
|
|
32
|
+
chat_agent (ChatAgent): Chat agent that will power the bot.
|
|
33
|
+
telegram_token (str, optional): The bot token.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
@dependencies_required('telebot')
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
chat_agent: ChatAgent,
|
|
40
|
+
telegram_token: Optional[str] = None,
|
|
41
|
+
) -> None:
|
|
42
|
+
self.chat_agent = chat_agent
|
|
43
|
+
|
|
44
|
+
if not telegram_token:
|
|
45
|
+
self.token = os.getenv('TELEGRAM_TOKEN')
|
|
46
|
+
if not self.token:
|
|
47
|
+
raise ValueError(
|
|
48
|
+
"`TELEGRAM_TOKEN` not found in environment variables. "
|
|
49
|
+
"Get it from t.me/BotFather."
|
|
50
|
+
)
|
|
51
|
+
else:
|
|
52
|
+
self.token = telegram_token
|
|
53
|
+
|
|
54
|
+
import telebot # type: ignore[import-untyped]
|
|
55
|
+
|
|
56
|
+
self.bot = telebot.TeleBot(token=self.token)
|
|
57
|
+
|
|
58
|
+
# Register the message handler within the constructor
|
|
59
|
+
self.bot.message_handler(func=lambda message: True)(self.on_message)
|
|
60
|
+
|
|
61
|
+
def run(self) -> None:
|
|
62
|
+
r"""Start the Telegram bot."""
|
|
63
|
+
print("Telegram bot is running...")
|
|
64
|
+
self.bot.infinity_polling()
|
|
65
|
+
|
|
66
|
+
def on_message(self, message: 'Message') -> None:
|
|
67
|
+
r"""Handles incoming messages from the user.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
message (types.Message): The incoming message object.
|
|
71
|
+
"""
|
|
72
|
+
self.chat_agent.reset()
|
|
73
|
+
|
|
74
|
+
if not message.text:
|
|
75
|
+
return
|
|
76
|
+
|
|
77
|
+
user_msg = BaseMessage.make_user_message(
|
|
78
|
+
role_name="User", content=message.text
|
|
79
|
+
)
|
|
80
|
+
assistant_response = self.chat_agent.step(user_msg)
|
|
81
|
+
|
|
82
|
+
self.bot.reply_to(message, assistant_response.msg.content)
|
camel/configs/__init__.py
CHANGED
|
@@ -18,7 +18,7 @@ from .groq_config import GROQ_API_PARAMS, GroqConfig
|
|
|
18
18
|
from .litellm_config import LITELLM_API_PARAMS, LiteLLMConfig
|
|
19
19
|
from .mistral_config import MISTRAL_API_PARAMS, MistralConfig
|
|
20
20
|
from .ollama_config import OLLAMA_API_PARAMS, OllamaConfig
|
|
21
|
-
from .openai_config import OPENAI_API_PARAMS, ChatGPTConfig
|
|
21
|
+
from .openai_config import OPENAI_API_PARAMS, ChatGPTConfig
|
|
22
22
|
from .reka_config import REKA_API_PARAMS, RekaConfig
|
|
23
23
|
from .samba_config import (
|
|
24
24
|
SAMBA_CLOUD_API_PARAMS,
|
|
@@ -40,7 +40,6 @@ __all__ = [
|
|
|
40
40
|
'ANTHROPIC_API_PARAMS',
|
|
41
41
|
'GROQ_API_PARAMS',
|
|
42
42
|
'GroqConfig',
|
|
43
|
-
'OpenSourceConfig',
|
|
44
43
|
'LiteLLMConfig',
|
|
45
44
|
'LITELLM_API_PARAMS',
|
|
46
45
|
'OllamaConfig',
|
|
@@ -15,9 +15,8 @@ from __future__ import annotations
|
|
|
15
15
|
|
|
16
16
|
from typing import List, Union
|
|
17
17
|
|
|
18
|
-
from anthropic import NOT_GIVEN, NotGiven
|
|
19
|
-
|
|
20
18
|
from camel.configs.base_config import BaseConfig
|
|
19
|
+
from camel.types import NOT_GIVEN, NotGiven
|
|
21
20
|
|
|
22
21
|
|
|
23
22
|
class AnthropicConfig(BaseConfig):
|
|
@@ -55,9 +54,7 @@ class AnthropicConfig(BaseConfig):
|
|
|
55
54
|
(default: :obj:`5`)
|
|
56
55
|
metadata: An object describing metadata about the request.
|
|
57
56
|
stream (bool, optional): Whether to incrementally stream the response
|
|
58
|
-
|
|
59
|
-
(default: :obj:`False`)
|
|
60
|
-
|
|
57
|
+
using server-sent events. (default: :obj:`False`)
|
|
61
58
|
"""
|
|
62
59
|
|
|
63
60
|
max_tokens: int = 256
|
camel/configs/base_config.py
CHANGED
|
@@ -39,13 +39,13 @@ class BaseConfig(ABC, BaseModel):
|
|
|
39
39
|
@classmethod
|
|
40
40
|
def fields_type_checking(cls, tools):
|
|
41
41
|
if tools is not None:
|
|
42
|
-
from camel.toolkits import
|
|
42
|
+
from camel.toolkits import FunctionTool
|
|
43
43
|
|
|
44
44
|
for tool in tools:
|
|
45
|
-
if not isinstance(tool,
|
|
45
|
+
if not isinstance(tool, FunctionTool):
|
|
46
46
|
raise ValueError(
|
|
47
47
|
f"The tool {tool} should "
|
|
48
|
-
"be an instance of `
|
|
48
|
+
"be an instance of `FunctionTool`."
|
|
49
49
|
)
|
|
50
50
|
return tools
|
|
51
51
|
|
|
@@ -54,14 +54,14 @@ class BaseConfig(ABC, BaseModel):
|
|
|
54
54
|
|
|
55
55
|
tools_schema = None
|
|
56
56
|
if self.tools:
|
|
57
|
-
from camel.toolkits import
|
|
57
|
+
from camel.toolkits import FunctionTool
|
|
58
58
|
|
|
59
59
|
tools_schema = []
|
|
60
60
|
for tool in self.tools:
|
|
61
|
-
if not isinstance(tool,
|
|
61
|
+
if not isinstance(tool, FunctionTool):
|
|
62
62
|
raise ValueError(
|
|
63
63
|
f"The tool {tool} should "
|
|
64
|
-
"be an instance of `
|
|
64
|
+
"be an instance of `FunctionTool`."
|
|
65
65
|
)
|
|
66
66
|
tools_schema.append(tool.get_openai_tool_schema())
|
|
67
67
|
config_dict["tools"] = tools_schema
|
camel/configs/gemini_config.py
CHANGED
|
@@ -86,7 +86,7 @@ class GeminiConfig(BaseConfig):
|
|
|
86
86
|
|
|
87
87
|
@model_validator(mode="before")
|
|
88
88
|
@classmethod
|
|
89
|
-
def
|
|
89
|
+
def model_type_checking(cls, data: Any):
|
|
90
90
|
if isinstance(data, dict):
|
|
91
91
|
response_schema = data.get("response_schema")
|
|
92
92
|
safety_settings = data.get("safety_settings")
|
camel/configs/groq_config.py
CHANGED
|
@@ -15,9 +15,8 @@ from __future__ import annotations
|
|
|
15
15
|
|
|
16
16
|
from typing import Optional, Sequence, Union
|
|
17
17
|
|
|
18
|
-
from openai._types import NOT_GIVEN, NotGiven
|
|
19
|
-
|
|
20
18
|
from camel.configs.base_config import BaseConfig
|
|
19
|
+
from camel.types import NOT_GIVEN, NotGiven
|
|
21
20
|
|
|
22
21
|
|
|
23
22
|
class GroqConfig(BaseConfig):
|
|
@@ -73,7 +72,7 @@ class GroqConfig(BaseConfig):
|
|
|
73
72
|
user (str, optional): A unique identifier representing your end-user,
|
|
74
73
|
which can help OpenAI to monitor and detect abuse.
|
|
75
74
|
(default: :obj:`""`)
|
|
76
|
-
tools (list[
|
|
75
|
+
tools (list[FunctionTool], optional): A list of tools the model may
|
|
77
76
|
call. Currently, only functions are supported as a tool. Use this
|
|
78
77
|
to provide a list of functions the model may generate JSON inputs
|
|
79
78
|
for. A max of 128 functions are supported.
|
camel/configs/ollama_config.py
CHANGED
|
@@ -15,9 +15,8 @@ from __future__ import annotations
|
|
|
15
15
|
|
|
16
16
|
from typing import Sequence, Union
|
|
17
17
|
|
|
18
|
-
from openai._types import NOT_GIVEN, NotGiven
|
|
19
|
-
|
|
20
18
|
from camel.configs.base_config import BaseConfig
|
|
19
|
+
from camel.types import NOT_GIVEN, NotGiven
|
|
21
20
|
|
|
22
21
|
|
|
23
22
|
class OllamaConfig(BaseConfig):
|
camel/configs/openai_config.py
CHANGED
|
@@ -15,10 +15,10 @@ from __future__ import annotations
|
|
|
15
15
|
|
|
16
16
|
from typing import Optional, Sequence, Union
|
|
17
17
|
|
|
18
|
-
from openai._types import NOT_GIVEN, NotGiven
|
|
19
18
|
from pydantic import Field
|
|
20
19
|
|
|
21
20
|
from camel.configs.base_config import BaseConfig
|
|
21
|
+
from camel.types import NOT_GIVEN, NotGiven
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class ChatGPTConfig(BaseConfig):
|
|
@@ -81,7 +81,7 @@ class ChatGPTConfig(BaseConfig):
|
|
|
81
81
|
user (str, optional): A unique identifier representing your end-user,
|
|
82
82
|
which can help OpenAI to monitor and detect abuse.
|
|
83
83
|
(default: :obj:`""`)
|
|
84
|
-
tools (list[
|
|
84
|
+
tools (list[FunctionTool], optional): A list of tools the model may
|
|
85
85
|
call. Currently, only functions are supported as a tool. Use this
|
|
86
86
|
to provide a list of functions the model may generate JSON inputs
|
|
87
87
|
for. A max of 128 functions are supported.
|
|
@@ -112,24 +112,3 @@ class ChatGPTConfig(BaseConfig):
|
|
|
112
112
|
|
|
113
113
|
|
|
114
114
|
OPENAI_API_PARAMS = {param for param in ChatGPTConfig.model_fields.keys()}
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
class OpenSourceConfig(BaseConfig):
|
|
118
|
-
r"""Defines parameters for setting up open-source models and includes
|
|
119
|
-
parameters to be passed to chat completion function of OpenAI API.
|
|
120
|
-
|
|
121
|
-
Args:
|
|
122
|
-
model_path (str): The path to a local folder containing the model
|
|
123
|
-
files or the model card in HuggingFace hub.
|
|
124
|
-
server_url (str): The URL to the server running the model inference
|
|
125
|
-
which will be used as the API base of OpenAI API.
|
|
126
|
-
api_params (ChatGPTConfig): An instance of :obj:ChatGPTConfig to
|
|
127
|
-
contain the arguments to be passed to OpenAI API.
|
|
128
|
-
"""
|
|
129
|
-
|
|
130
|
-
# Maybe the param needs to be renamed.
|
|
131
|
-
# Warning: Field "model_path" has conflict with protected namespace
|
|
132
|
-
# "model_".
|
|
133
|
-
model_path: str
|
|
134
|
-
server_url: str
|
|
135
|
-
api_params: ChatGPTConfig = Field(default_factory=ChatGPTConfig)
|
camel/configs/samba_config.py
CHANGED
|
@@ -15,10 +15,10 @@ from __future__ import annotations
|
|
|
15
15
|
|
|
16
16
|
from typing import Any, Dict, Optional, Sequence, Union
|
|
17
17
|
|
|
18
|
-
from openai._types import NOT_GIVEN, NotGiven
|
|
19
18
|
from pydantic import Field
|
|
20
19
|
|
|
21
20
|
from camel.configs.base_config import BaseConfig
|
|
21
|
+
from camel.types import NOT_GIVEN, NotGiven
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class SambaFastAPIConfig(BaseConfig):
|
|
@@ -172,7 +172,7 @@ class SambaCloudAPIConfig(BaseConfig):
|
|
|
172
172
|
user (str, optional): A unique identifier representing your end-user,
|
|
173
173
|
which can help OpenAI to monitor and detect abuse.
|
|
174
174
|
(default: :obj:`""`)
|
|
175
|
-
tools (list[
|
|
175
|
+
tools (list[FunctionTool], optional): A list of tools the model may
|
|
176
176
|
call. Currently, only functions are supported as a tool. Use this
|
|
177
177
|
to provide a list of functions the model may generate JSON inputs
|
|
178
178
|
for. A max of 128 functions are supported.
|
|
@@ -15,10 +15,10 @@ from __future__ import annotations
|
|
|
15
15
|
|
|
16
16
|
from typing import Any, Sequence, Union
|
|
17
17
|
|
|
18
|
-
from openai._types import NOT_GIVEN, NotGiven
|
|
19
18
|
from pydantic import Field
|
|
20
19
|
|
|
21
20
|
from camel.configs.base_config import BaseConfig
|
|
21
|
+
from camel.types import NOT_GIVEN, NotGiven
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class TogetherAIConfig(BaseConfig):
|
camel/configs/vllm_config.py
CHANGED
|
@@ -15,10 +15,10 @@ from __future__ import annotations
|
|
|
15
15
|
|
|
16
16
|
from typing import Sequence, Union
|
|
17
17
|
|
|
18
|
-
from openai._types import NOT_GIVEN, NotGiven
|
|
19
18
|
from pydantic import Field
|
|
20
19
|
|
|
21
20
|
from camel.configs.base_config import BaseConfig
|
|
21
|
+
from camel.types import NOT_GIVEN, NotGiven
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
# flake8: noqa: E501
|
camel/configs/zhipuai_config.py
CHANGED
|
@@ -15,9 +15,8 @@ from __future__ import annotations
|
|
|
15
15
|
|
|
16
16
|
from typing import Optional, Sequence, Union
|
|
17
17
|
|
|
18
|
-
from openai._types import NOT_GIVEN, NotGiven
|
|
19
|
-
|
|
20
18
|
from camel.configs.base_config import BaseConfig
|
|
19
|
+
from camel.types import NOT_GIVEN, NotGiven
|
|
21
20
|
|
|
22
21
|
|
|
23
22
|
class ZhipuAIConfig(BaseConfig):
|
|
@@ -45,7 +44,7 @@ class ZhipuAIConfig(BaseConfig):
|
|
|
45
44
|
in the chat completion. The total length of input tokens and
|
|
46
45
|
generated tokens is limited by the model's context length.
|
|
47
46
|
(default: :obj:`None`)
|
|
48
|
-
tools (list[
|
|
47
|
+
tools (list[FunctionTool], optional): A list of tools the model may
|
|
49
48
|
call. Currently, only functions are supported as a tool. Use this
|
|
50
49
|
to provide a list of functions the model may generate JSON inputs
|
|
51
50
|
for. A max of 128 functions are supported.
|
|
@@ -16,10 +16,10 @@ from __future__ import annotations
|
|
|
16
16
|
import os
|
|
17
17
|
from typing import Any
|
|
18
18
|
|
|
19
|
-
from openai import
|
|
19
|
+
from openai import OpenAI
|
|
20
20
|
|
|
21
21
|
from camel.embeddings.base import BaseEmbedding
|
|
22
|
-
from camel.types import EmbeddingModelType
|
|
22
|
+
from camel.types import NOT_GIVEN, EmbeddingModelType, NotGiven
|
|
23
23
|
from camel.utils import api_keys_required
|
|
24
24
|
|
|
25
25
|
|
camel/loaders/__init__.py
CHANGED
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
14
|
|
|
15
15
|
from .base_io import File
|
|
16
|
+
from .chunkr_reader import ChunkrReader
|
|
16
17
|
from .firecrawl_reader import Firecrawl
|
|
17
18
|
from .jina_url_reader import JinaURLReader
|
|
18
19
|
from .unstructured_io import UnstructuredIO
|
|
@@ -22,4 +23,5 @@ __all__ = [
|
|
|
22
23
|
'UnstructuredIO',
|
|
23
24
|
'JinaURLReader',
|
|
24
25
|
'Firecrawl',
|
|
26
|
+
'ChunkrReader',
|
|
25
27
|
]
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import logging
|
|
17
|
+
import os
|
|
18
|
+
import time
|
|
19
|
+
from typing import IO, Any, Optional, Union
|
|
20
|
+
|
|
21
|
+
import requests
|
|
22
|
+
|
|
23
|
+
from camel.utils import api_keys_required
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ChunkrReader:
|
|
29
|
+
r"""Chunkr Reader for processing documents and returning content
|
|
30
|
+
in various formats.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
api_key (Optional[str], optional): The API key for Chunkr API. If not
|
|
34
|
+
provided, it will be retrieved from the environment variable
|
|
35
|
+
`CHUNKR_API_KEY`. (default: :obj:`None`)
|
|
36
|
+
url (Optional[str], optional): The url to the Chunkr service.
|
|
37
|
+
(default: :obj:`https://api.chunkr.ai/api/v1/task`)
|
|
38
|
+
timeout (int, optional): The maximum time in seconds to wait for the
|
|
39
|
+
API responses. (default: :obj:`30`)
|
|
40
|
+
**kwargs (Any): Additional keyword arguments for request headers.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
@api_keys_required("CHUNKR_API_KEY")
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
api_key: Optional[str] = None,
|
|
47
|
+
url: Optional[str] = "https://api.chunkr.ai/api/v1/task",
|
|
48
|
+
timeout: int = 30,
|
|
49
|
+
**kwargs: Any,
|
|
50
|
+
) -> None:
|
|
51
|
+
self._api_key = api_key or os.getenv('CHUNKR_API_KEY')
|
|
52
|
+
self._url = os.getenv('CHUNKR_API_URL') or url
|
|
53
|
+
self._headers = {
|
|
54
|
+
"Authorization": f"{self._api_key}",
|
|
55
|
+
**kwargs,
|
|
56
|
+
}
|
|
57
|
+
self.timeout = timeout
|
|
58
|
+
|
|
59
|
+
def submit_task(
|
|
60
|
+
self,
|
|
61
|
+
file_path: str,
|
|
62
|
+
model: str = "Fast",
|
|
63
|
+
ocr_strategy: str = "Auto",
|
|
64
|
+
target_chunk_length: str = "512",
|
|
65
|
+
) -> str:
|
|
66
|
+
r"""Submits a file to the Chunkr API and returns the task ID.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
file_path (str): The path to the file to be uploaded.
|
|
70
|
+
model (str, optional): The model to be used for the task.
|
|
71
|
+
(default: :obj:`Fast`)
|
|
72
|
+
ocr_strategy (str, optional): The OCR strategy. Defaults to 'Auto'.
|
|
73
|
+
target_chunk_length (str, optional): The target chunk length.
|
|
74
|
+
(default: :obj:`512`)
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
str: The task ID.
|
|
78
|
+
"""
|
|
79
|
+
with open(file_path, 'rb') as file:
|
|
80
|
+
files: dict[
|
|
81
|
+
str, Union[tuple[None, IO[bytes]], tuple[None, str]]
|
|
82
|
+
] = {
|
|
83
|
+
'file': (
|
|
84
|
+
None,
|
|
85
|
+
file,
|
|
86
|
+
), # Properly pass the file as a binary stream
|
|
87
|
+
'model': (None, model),
|
|
88
|
+
'ocr_strategy': (None, ocr_strategy),
|
|
89
|
+
'target_chunk_length': (None, target_chunk_length),
|
|
90
|
+
}
|
|
91
|
+
try:
|
|
92
|
+
response = requests.post(
|
|
93
|
+
self._url, # type: ignore[arg-type]
|
|
94
|
+
headers=self._headers,
|
|
95
|
+
files=files,
|
|
96
|
+
timeout=self.timeout,
|
|
97
|
+
)
|
|
98
|
+
response.raise_for_status()
|
|
99
|
+
task_id = response.json().get('task_id')
|
|
100
|
+
if not task_id:
|
|
101
|
+
raise ValueError("Task ID not returned in the response.")
|
|
102
|
+
logger.info(f"Task submitted successfully. Task ID: {task_id}")
|
|
103
|
+
return task_id
|
|
104
|
+
except Exception as e:
|
|
105
|
+
logger.error(f"Failed to submit task: {e}")
|
|
106
|
+
raise ValueError(f"Failed to submit task: {e}") from e
|
|
107
|
+
|
|
108
|
+
def get_task_output(self, task_id: str, max_retries: int = 5) -> str:
|
|
109
|
+
r"""Polls the Chunkr API to check the task status and returns the task
|
|
110
|
+
result.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
task_id (str): The task ID to check the status for.
|
|
114
|
+
max_retries (int, optional): Maximum number of retry attempts.
|
|
115
|
+
(default: :obj:`5`)
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
str: The formatted task result in JSON format.
|
|
119
|
+
|
|
120
|
+
Raises:
|
|
121
|
+
ValueError: If the task status cannot be retrieved.
|
|
122
|
+
RuntimeError: If the maximum number of retries is reached without
|
|
123
|
+
a successful task completion.
|
|
124
|
+
"""
|
|
125
|
+
url_get = f"{self._url}/{task_id}"
|
|
126
|
+
attempts = 0
|
|
127
|
+
|
|
128
|
+
while attempts < max_retries:
|
|
129
|
+
try:
|
|
130
|
+
response = requests.get(
|
|
131
|
+
url_get, headers=self._headers, timeout=self.timeout
|
|
132
|
+
)
|
|
133
|
+
response.raise_for_status()
|
|
134
|
+
task_status = response.json().get('status')
|
|
135
|
+
|
|
136
|
+
if task_status == "Succeeded":
|
|
137
|
+
logger.info(f"Task {task_id} completed successfully.")
|
|
138
|
+
return self._pretty_print_response(response.json())
|
|
139
|
+
else:
|
|
140
|
+
logger.info(
|
|
141
|
+
f"Task {task_id} is still {task_status}. Retrying "
|
|
142
|
+
"in 5 seconds..."
|
|
143
|
+
)
|
|
144
|
+
except Exception as e:
|
|
145
|
+
logger.error(f"Failed to retrieve task status: {e}")
|
|
146
|
+
raise ValueError(f"Failed to retrieve task status: {e}") from e
|
|
147
|
+
|
|
148
|
+
attempts += 1
|
|
149
|
+
time.sleep(5)
|
|
150
|
+
|
|
151
|
+
logger.error(f"Max retries reached for task {task_id}.")
|
|
152
|
+
raise RuntimeError(f"Max retries reached for task {task_id}.")
|
|
153
|
+
|
|
154
|
+
def _pretty_print_response(self, response_json: dict) -> str:
|
|
155
|
+
r"""Pretty prints the JSON response.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
response_json (dict): The response JSON to pretty print.
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
str: Formatted JSON as a string.
|
|
162
|
+
"""
|
|
163
|
+
return json.dumps(response_json, indent=4)
|
|
@@ -49,7 +49,6 @@ class Firecrawl:
|
|
|
49
49
|
self,
|
|
50
50
|
url: str,
|
|
51
51
|
params: Optional[Dict[str, Any]] = None,
|
|
52
|
-
wait_until_done: bool = True,
|
|
53
52
|
**kwargs: Any,
|
|
54
53
|
) -> Any:
|
|
55
54
|
r"""Crawl a URL and all accessible subpages. Customize the crawl by
|
|
@@ -60,14 +59,12 @@ class Firecrawl:
|
|
|
60
59
|
url (str): The URL to crawl.
|
|
61
60
|
params (Optional[Dict[str, Any]]): Additional parameters for the
|
|
62
61
|
crawl request. Defaults to `None`.
|
|
63
|
-
wait_until_done (bool): Whether to wait until the crawl job is
|
|
64
|
-
completed. Defaults to `True`.
|
|
65
62
|
**kwargs (Any): Additional keyword arguments, such as
|
|
66
|
-
`poll_interval`, `idempotency_key
|
|
63
|
+
`poll_interval`, `idempotency_key`.
|
|
67
64
|
|
|
68
65
|
Returns:
|
|
69
|
-
Any: The
|
|
70
|
-
|
|
66
|
+
Any: The crawl job ID or the crawl results if waiting until
|
|
67
|
+
completion.
|
|
71
68
|
|
|
72
69
|
Raises:
|
|
73
70
|
RuntimeError: If the crawling process fails.
|
|
@@ -78,13 +75,8 @@ class Firecrawl:
|
|
|
78
75
|
url=url,
|
|
79
76
|
params=params,
|
|
80
77
|
**kwargs,
|
|
81
|
-
wait_until_done=wait_until_done,
|
|
82
|
-
)
|
|
83
|
-
return (
|
|
84
|
-
crawl_response
|
|
85
|
-
if wait_until_done
|
|
86
|
-
else crawl_response.get("jobId")
|
|
87
78
|
)
|
|
79
|
+
return crawl_response
|
|
88
80
|
except Exception as e:
|
|
89
81
|
raise RuntimeError(f"Failed to crawl the URL: {e}")
|
|
90
82
|
|
|
@@ -103,7 +95,10 @@ class Firecrawl:
|
|
|
103
95
|
"""
|
|
104
96
|
|
|
105
97
|
try:
|
|
106
|
-
crawl_result = self.app.crawl_url(
|
|
98
|
+
crawl_result = self.app.crawl_url(
|
|
99
|
+
url,
|
|
100
|
+
{'formats': ['markdown']},
|
|
101
|
+
)
|
|
107
102
|
if not isinstance(crawl_result, list):
|
|
108
103
|
raise ValueError("Unexpected response format")
|
|
109
104
|
markdown_contents = [
|
|
@@ -160,12 +155,12 @@ class Firecrawl:
|
|
|
160
155
|
except Exception as e:
|
|
161
156
|
raise RuntimeError(f"Failed to scrape the URL: {e}")
|
|
162
157
|
|
|
163
|
-
def structured_scrape(self, url: str,
|
|
158
|
+
def structured_scrape(self, url: str, response_format: BaseModel) -> Dict:
|
|
164
159
|
r"""Use LLM to extract structured data from given URL.
|
|
165
160
|
|
|
166
161
|
Args:
|
|
167
162
|
url (str): The URL to read.
|
|
168
|
-
|
|
163
|
+
response_format (BaseModel): A pydantic model
|
|
169
164
|
that includes value types and field descriptions used to
|
|
170
165
|
generate a structured response by LLM. This schema helps
|
|
171
166
|
in defining the expected output format.
|
|
@@ -180,41 +175,14 @@ class Firecrawl:
|
|
|
180
175
|
data = self.app.scrape_url(
|
|
181
176
|
url,
|
|
182
177
|
{
|
|
183
|
-
'
|
|
184
|
-
|
|
185
|
-
"extractionPrompt": "Based on the information on "
|
|
186
|
-
"the page, extract the information from the schema.",
|
|
187
|
-
'extractionSchema': output_schema.model_json_schema(),
|
|
188
|
-
},
|
|
189
|
-
'pageOptions': {'onlyMainContent': True},
|
|
178
|
+
'formats': ['extract'],
|
|
179
|
+
'extract': {'schema': response_format.model_json_schema()},
|
|
190
180
|
},
|
|
191
181
|
)
|
|
192
|
-
return data.get("
|
|
182
|
+
return data.get("extract", {})
|
|
193
183
|
except Exception as e:
|
|
194
184
|
raise RuntimeError(f"Failed to perform structured scrape: {e}")
|
|
195
185
|
|
|
196
|
-
def tidy_scrape(self, url: str) -> str:
|
|
197
|
-
r"""Only return the main content of the page, excluding headers,
|
|
198
|
-
navigation bars, footers, etc. in Markdown format.
|
|
199
|
-
|
|
200
|
-
Args:
|
|
201
|
-
url (str): The URL to read.
|
|
202
|
-
|
|
203
|
-
Returns:
|
|
204
|
-
str: The markdown content of the URL.
|
|
205
|
-
|
|
206
|
-
Raises:
|
|
207
|
-
RuntimeError: If the scrape process fails.
|
|
208
|
-
"""
|
|
209
|
-
|
|
210
|
-
try:
|
|
211
|
-
scrape_result = self.app.scrape_url(
|
|
212
|
-
url, {'pageOptions': {'onlyMainContent': True}}
|
|
213
|
-
)
|
|
214
|
-
return scrape_result.get("markdown", "")
|
|
215
|
-
except Exception as e:
|
|
216
|
-
raise RuntimeError(f"Failed to perform tidy scrape: {e}")
|
|
217
|
-
|
|
218
186
|
def map_site(
|
|
219
187
|
self, url: str, params: Optional[Dict[str, Any]] = None
|
|
220
188
|
) -> list:
|