camel-ai 0.1.9__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (102) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/chat_agent.py +334 -113
  3. camel/agents/knowledge_graph_agent.py +4 -6
  4. camel/bots/__init__.py +34 -0
  5. camel/bots/discord_app.py +138 -0
  6. camel/bots/slack/__init__.py +30 -0
  7. camel/bots/slack/models.py +158 -0
  8. camel/bots/slack/slack_app.py +255 -0
  9. camel/bots/telegram_bot.py +82 -0
  10. camel/configs/__init__.py +1 -2
  11. camel/configs/anthropic_config.py +2 -5
  12. camel/configs/base_config.py +6 -6
  13. camel/configs/gemini_config.py +1 -1
  14. camel/configs/groq_config.py +2 -3
  15. camel/configs/ollama_config.py +1 -2
  16. camel/configs/openai_config.py +2 -23
  17. camel/configs/samba_config.py +2 -2
  18. camel/configs/togetherai_config.py +1 -1
  19. camel/configs/vllm_config.py +1 -1
  20. camel/configs/zhipuai_config.py +2 -3
  21. camel/embeddings/openai_embedding.py +2 -2
  22. camel/loaders/__init__.py +2 -0
  23. camel/loaders/chunkr_reader.py +163 -0
  24. camel/loaders/firecrawl_reader.py +13 -45
  25. camel/loaders/unstructured_io.py +65 -29
  26. camel/messages/__init__.py +1 -0
  27. camel/messages/func_message.py +2 -2
  28. camel/models/__init__.py +2 -4
  29. camel/models/anthropic_model.py +32 -26
  30. camel/models/azure_openai_model.py +39 -36
  31. camel/models/base_model.py +31 -20
  32. camel/models/gemini_model.py +37 -29
  33. camel/models/groq_model.py +29 -23
  34. camel/models/litellm_model.py +44 -61
  35. camel/models/mistral_model.py +33 -30
  36. camel/models/model_factory.py +66 -76
  37. camel/models/nemotron_model.py +33 -23
  38. camel/models/ollama_model.py +42 -47
  39. camel/models/{openai_compatibility_model.py → openai_compatible_model.py} +36 -41
  40. camel/models/openai_model.py +60 -25
  41. camel/models/reka_model.py +30 -28
  42. camel/models/samba_model.py +82 -177
  43. camel/models/stub_model.py +2 -2
  44. camel/models/togetherai_model.py +37 -43
  45. camel/models/vllm_model.py +43 -50
  46. camel/models/zhipuai_model.py +33 -27
  47. camel/retrievers/auto_retriever.py +28 -10
  48. camel/retrievers/vector_retriever.py +72 -44
  49. camel/societies/babyagi_playing.py +6 -3
  50. camel/societies/role_playing.py +17 -3
  51. camel/storages/__init__.py +2 -0
  52. camel/storages/graph_storages/__init__.py +2 -0
  53. camel/storages/graph_storages/graph_element.py +3 -5
  54. camel/storages/graph_storages/nebula_graph.py +547 -0
  55. camel/storages/key_value_storages/json.py +6 -1
  56. camel/tasks/task.py +11 -4
  57. camel/tasks/task_prompt.py +4 -0
  58. camel/toolkits/__init__.py +28 -24
  59. camel/toolkits/arxiv_toolkit.py +155 -0
  60. camel/toolkits/ask_news_toolkit.py +653 -0
  61. camel/toolkits/base.py +2 -3
  62. camel/toolkits/code_execution.py +6 -7
  63. camel/toolkits/dalle_toolkit.py +6 -6
  64. camel/toolkits/{openai_function.py → function_tool.py} +34 -11
  65. camel/toolkits/github_toolkit.py +9 -10
  66. camel/toolkits/google_maps_toolkit.py +7 -14
  67. camel/toolkits/google_scholar_toolkit.py +146 -0
  68. camel/toolkits/linkedin_toolkit.py +7 -10
  69. camel/toolkits/math_toolkit.py +8 -8
  70. camel/toolkits/open_api_toolkit.py +5 -8
  71. camel/toolkits/reddit_toolkit.py +7 -10
  72. camel/toolkits/retrieval_toolkit.py +5 -9
  73. camel/toolkits/search_toolkit.py +9 -9
  74. camel/toolkits/slack_toolkit.py +11 -14
  75. camel/toolkits/twitter_toolkit.py +377 -454
  76. camel/toolkits/weather_toolkit.py +6 -6
  77. camel/toolkits/whatsapp_toolkit.py +177 -0
  78. camel/types/__init__.py +6 -1
  79. camel/types/enums.py +43 -85
  80. camel/types/openai_types.py +3 -0
  81. camel/types/unified_model_type.py +104 -0
  82. camel/utils/__init__.py +0 -2
  83. camel/utils/async_func.py +7 -7
  84. camel/utils/commons.py +40 -4
  85. camel/utils/token_counting.py +38 -214
  86. camel/workforce/__init__.py +6 -6
  87. camel/workforce/base.py +9 -5
  88. camel/workforce/prompts.py +179 -0
  89. camel/workforce/role_playing_worker.py +181 -0
  90. camel/workforce/{single_agent_node.py → single_agent_worker.py} +49 -23
  91. camel/workforce/task_channel.py +7 -8
  92. camel/workforce/utils.py +20 -50
  93. camel/workforce/{worker_node.py → worker.py} +15 -12
  94. camel/workforce/workforce.py +456 -19
  95. camel_ai-0.2.3.dist-info/LICENSE +201 -0
  96. {camel_ai-0.1.9.dist-info → camel_ai-0.2.3.dist-info}/METADATA +40 -65
  97. {camel_ai-0.1.9.dist-info → camel_ai-0.2.3.dist-info}/RECORD +98 -86
  98. {camel_ai-0.1.9.dist-info → camel_ai-0.2.3.dist-info}/WHEEL +1 -1
  99. camel/models/open_source_model.py +0 -170
  100. camel/workforce/manager_node.py +0 -299
  101. camel/workforce/role_playing_node.py +0 -168
  102. camel/workforce/workforce_prompt.py +0 -125
@@ -0,0 +1,82 @@
1
+ # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
2
+ # Licensed under the Apache License, Version 2.0 (the “License”);
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an “AS IS” BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
14
+ import os
15
+ from typing import TYPE_CHECKING, Optional
16
+
17
+ from camel.agents import ChatAgent
18
+ from camel.messages import BaseMessage
19
+ from camel.utils import dependencies_required
20
+
21
+ # Conditionally import telebot types only for type checking
22
+ if TYPE_CHECKING:
23
+ from telebot.types import ( # type: ignore[import-untyped]
24
+ Message,
25
+ )
26
+
27
+
28
+ class TelegramBot:
29
+ r"""Represents a Telegram bot that is powered by an agent.
30
+
31
+ Attributes:
32
+ chat_agent (ChatAgent): Chat agent that will power the bot.
33
+ telegram_token (str, optional): The bot token.
34
+ """
35
+
36
+ @dependencies_required('telebot')
37
+ def __init__(
38
+ self,
39
+ chat_agent: ChatAgent,
40
+ telegram_token: Optional[str] = None,
41
+ ) -> None:
42
+ self.chat_agent = chat_agent
43
+
44
+ if not telegram_token:
45
+ self.token = os.getenv('TELEGRAM_TOKEN')
46
+ if not self.token:
47
+ raise ValueError(
48
+ "`TELEGRAM_TOKEN` not found in environment variables. "
49
+ "Get it from t.me/BotFather."
50
+ )
51
+ else:
52
+ self.token = telegram_token
53
+
54
+ import telebot # type: ignore[import-untyped]
55
+
56
+ self.bot = telebot.TeleBot(token=self.token)
57
+
58
+ # Register the message handler within the constructor
59
+ self.bot.message_handler(func=lambda message: True)(self.on_message)
60
+
61
+ def run(self) -> None:
62
+ r"""Start the Telegram bot."""
63
+ print("Telegram bot is running...")
64
+ self.bot.infinity_polling()
65
+
66
+ def on_message(self, message: 'Message') -> None:
67
+ r"""Handles incoming messages from the user.
68
+
69
+ Args:
70
+ message (types.Message): The incoming message object.
71
+ """
72
+ self.chat_agent.reset()
73
+
74
+ if not message.text:
75
+ return
76
+
77
+ user_msg = BaseMessage.make_user_message(
78
+ role_name="User", content=message.text
79
+ )
80
+ assistant_response = self.chat_agent.step(user_msg)
81
+
82
+ self.bot.reply_to(message, assistant_response.msg.content)
camel/configs/__init__.py CHANGED
@@ -18,7 +18,7 @@ from .groq_config import GROQ_API_PARAMS, GroqConfig
18
18
  from .litellm_config import LITELLM_API_PARAMS, LiteLLMConfig
19
19
  from .mistral_config import MISTRAL_API_PARAMS, MistralConfig
20
20
  from .ollama_config import OLLAMA_API_PARAMS, OllamaConfig
21
- from .openai_config import OPENAI_API_PARAMS, ChatGPTConfig, OpenSourceConfig
21
+ from .openai_config import OPENAI_API_PARAMS, ChatGPTConfig
22
22
  from .reka_config import REKA_API_PARAMS, RekaConfig
23
23
  from .samba_config import (
24
24
  SAMBA_CLOUD_API_PARAMS,
@@ -40,7 +40,6 @@ __all__ = [
40
40
  'ANTHROPIC_API_PARAMS',
41
41
  'GROQ_API_PARAMS',
42
42
  'GroqConfig',
43
- 'OpenSourceConfig',
44
43
  'LiteLLMConfig',
45
44
  'LITELLM_API_PARAMS',
46
45
  'OllamaConfig',
@@ -15,9 +15,8 @@ from __future__ import annotations
15
15
 
16
16
  from typing import List, Union
17
17
 
18
- from anthropic import NOT_GIVEN, NotGiven
19
-
20
18
  from camel.configs.base_config import BaseConfig
19
+ from camel.types import NOT_GIVEN, NotGiven
21
20
 
22
21
 
23
22
  class AnthropicConfig(BaseConfig):
@@ -55,9 +54,7 @@ class AnthropicConfig(BaseConfig):
55
54
  (default: :obj:`5`)
56
55
  metadata: An object describing metadata about the request.
57
56
  stream (bool, optional): Whether to incrementally stream the response
58
- using server-sent events.
59
- (default: :obj:`False`)
60
-
57
+ using server-sent events. (default: :obj:`False`)
61
58
  """
62
59
 
63
60
  max_tokens: int = 256
@@ -39,13 +39,13 @@ class BaseConfig(ABC, BaseModel):
39
39
  @classmethod
40
40
  def fields_type_checking(cls, tools):
41
41
  if tools is not None:
42
- from camel.toolkits import OpenAIFunction
42
+ from camel.toolkits import FunctionTool
43
43
 
44
44
  for tool in tools:
45
- if not isinstance(tool, OpenAIFunction):
45
+ if not isinstance(tool, FunctionTool):
46
46
  raise ValueError(
47
47
  f"The tool {tool} should "
48
- "be an instance of `OpenAIFunction`."
48
+ "be an instance of `FunctionTool`."
49
49
  )
50
50
  return tools
51
51
 
@@ -54,14 +54,14 @@ class BaseConfig(ABC, BaseModel):
54
54
 
55
55
  tools_schema = None
56
56
  if self.tools:
57
- from camel.toolkits import OpenAIFunction
57
+ from camel.toolkits import FunctionTool
58
58
 
59
59
  tools_schema = []
60
60
  for tool in self.tools:
61
- if not isinstance(tool, OpenAIFunction):
61
+ if not isinstance(tool, FunctionTool):
62
62
  raise ValueError(
63
63
  f"The tool {tool} should "
64
- "be an instance of `OpenAIFunction`."
64
+ "be an instance of `FunctionTool`."
65
65
  )
66
66
  tools_schema.append(tool.get_openai_tool_schema())
67
67
  config_dict["tools"] = tools_schema
@@ -86,7 +86,7 @@ class GeminiConfig(BaseConfig):
86
86
 
87
87
  @model_validator(mode="before")
88
88
  @classmethod
89
- def fields_type_checking(cls, data: Any):
89
+ def model_type_checking(cls, data: Any):
90
90
  if isinstance(data, dict):
91
91
  response_schema = data.get("response_schema")
92
92
  safety_settings = data.get("safety_settings")
@@ -15,9 +15,8 @@ from __future__ import annotations
15
15
 
16
16
  from typing import Optional, Sequence, Union
17
17
 
18
- from openai._types import NOT_GIVEN, NotGiven
19
-
20
18
  from camel.configs.base_config import BaseConfig
19
+ from camel.types import NOT_GIVEN, NotGiven
21
20
 
22
21
 
23
22
  class GroqConfig(BaseConfig):
@@ -73,7 +72,7 @@ class GroqConfig(BaseConfig):
73
72
  user (str, optional): A unique identifier representing your end-user,
74
73
  which can help OpenAI to monitor and detect abuse.
75
74
  (default: :obj:`""`)
76
- tools (list[OpenAIFunction], optional): A list of tools the model may
75
+ tools (list[FunctionTool], optional): A list of tools the model may
77
76
  call. Currently, only functions are supported as a tool. Use this
78
77
  to provide a list of functions the model may generate JSON inputs
79
78
  for. A max of 128 functions are supported.
@@ -15,9 +15,8 @@ from __future__ import annotations
15
15
 
16
16
  from typing import Sequence, Union
17
17
 
18
- from openai._types import NOT_GIVEN, NotGiven
19
-
20
18
  from camel.configs.base_config import BaseConfig
19
+ from camel.types import NOT_GIVEN, NotGiven
21
20
 
22
21
 
23
22
  class OllamaConfig(BaseConfig):
@@ -15,10 +15,10 @@ from __future__ import annotations
15
15
 
16
16
  from typing import Optional, Sequence, Union
17
17
 
18
- from openai._types import NOT_GIVEN, NotGiven
19
18
  from pydantic import Field
20
19
 
21
20
  from camel.configs.base_config import BaseConfig
21
+ from camel.types import NOT_GIVEN, NotGiven
22
22
 
23
23
 
24
24
  class ChatGPTConfig(BaseConfig):
@@ -81,7 +81,7 @@ class ChatGPTConfig(BaseConfig):
81
81
  user (str, optional): A unique identifier representing your end-user,
82
82
  which can help OpenAI to monitor and detect abuse.
83
83
  (default: :obj:`""`)
84
- tools (list[OpenAIFunction], optional): A list of tools the model may
84
+ tools (list[FunctionTool], optional): A list of tools the model may
85
85
  call. Currently, only functions are supported as a tool. Use this
86
86
  to provide a list of functions the model may generate JSON inputs
87
87
  for. A max of 128 functions are supported.
@@ -112,24 +112,3 @@ class ChatGPTConfig(BaseConfig):
112
112
 
113
113
 
114
114
  OPENAI_API_PARAMS = {param for param in ChatGPTConfig.model_fields.keys()}
115
-
116
-
117
- class OpenSourceConfig(BaseConfig):
118
- r"""Defines parameters for setting up open-source models and includes
119
- parameters to be passed to chat completion function of OpenAI API.
120
-
121
- Args:
122
- model_path (str): The path to a local folder containing the model
123
- files or the model card in HuggingFace hub.
124
- server_url (str): The URL to the server running the model inference
125
- which will be used as the API base of OpenAI API.
126
- api_params (ChatGPTConfig): An instance of :obj:ChatGPTConfig to
127
- contain the arguments to be passed to OpenAI API.
128
- """
129
-
130
- # Maybe the param needs to be renamed.
131
- # Warning: Field "model_path" has conflict with protected namespace
132
- # "model_".
133
- model_path: str
134
- server_url: str
135
- api_params: ChatGPTConfig = Field(default_factory=ChatGPTConfig)
@@ -15,10 +15,10 @@ from __future__ import annotations
15
15
 
16
16
  from typing import Any, Dict, Optional, Sequence, Union
17
17
 
18
- from openai._types import NOT_GIVEN, NotGiven
19
18
  from pydantic import Field
20
19
 
21
20
  from camel.configs.base_config import BaseConfig
21
+ from camel.types import NOT_GIVEN, NotGiven
22
22
 
23
23
 
24
24
  class SambaFastAPIConfig(BaseConfig):
@@ -172,7 +172,7 @@ class SambaCloudAPIConfig(BaseConfig):
172
172
  user (str, optional): A unique identifier representing your end-user,
173
173
  which can help OpenAI to monitor and detect abuse.
174
174
  (default: :obj:`""`)
175
- tools (list[OpenAIFunction], optional): A list of tools the model may
175
+ tools (list[FunctionTool], optional): A list of tools the model may
176
176
  call. Currently, only functions are supported as a tool. Use this
177
177
  to provide a list of functions the model may generate JSON inputs
178
178
  for. A max of 128 functions are supported.
@@ -15,10 +15,10 @@ from __future__ import annotations
15
15
 
16
16
  from typing import Any, Sequence, Union
17
17
 
18
- from openai._types import NOT_GIVEN, NotGiven
19
18
  from pydantic import Field
20
19
 
21
20
  from camel.configs.base_config import BaseConfig
21
+ from camel.types import NOT_GIVEN, NotGiven
22
22
 
23
23
 
24
24
  class TogetherAIConfig(BaseConfig):
@@ -15,10 +15,10 @@ from __future__ import annotations
15
15
 
16
16
  from typing import Sequence, Union
17
17
 
18
- from openai._types import NOT_GIVEN, NotGiven
19
18
  from pydantic import Field
20
19
 
21
20
  from camel.configs.base_config import BaseConfig
21
+ from camel.types import NOT_GIVEN, NotGiven
22
22
 
23
23
 
24
24
  # flake8: noqa: E501
@@ -15,9 +15,8 @@ from __future__ import annotations
15
15
 
16
16
  from typing import Optional, Sequence, Union
17
17
 
18
- from openai._types import NOT_GIVEN, NotGiven
19
-
20
18
  from camel.configs.base_config import BaseConfig
19
+ from camel.types import NOT_GIVEN, NotGiven
21
20
 
22
21
 
23
22
  class ZhipuAIConfig(BaseConfig):
@@ -45,7 +44,7 @@ class ZhipuAIConfig(BaseConfig):
45
44
  in the chat completion. The total length of input tokens and
46
45
  generated tokens is limited by the model's context length.
47
46
  (default: :obj:`None`)
48
- tools (list[OpenAIFunction], optional): A list of tools the model may
47
+ tools (list[FunctionTool], optional): A list of tools the model may
49
48
  call. Currently, only functions are supported as a tool. Use this
50
49
  to provide a list of functions the model may generate JSON inputs
51
50
  for. A max of 128 functions are supported.
@@ -16,10 +16,10 @@ from __future__ import annotations
16
16
  import os
17
17
  from typing import Any
18
18
 
19
- from openai import NOT_GIVEN, NotGiven, OpenAI
19
+ from openai import OpenAI
20
20
 
21
21
  from camel.embeddings.base import BaseEmbedding
22
- from camel.types import EmbeddingModelType
22
+ from camel.types import NOT_GIVEN, EmbeddingModelType, NotGiven
23
23
  from camel.utils import api_keys_required
24
24
 
25
25
 
camel/loaders/__init__.py CHANGED
@@ -13,6 +13,7 @@
13
13
  # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
14
14
 
15
15
  from .base_io import File
16
+ from .chunkr_reader import ChunkrReader
16
17
  from .firecrawl_reader import Firecrawl
17
18
  from .jina_url_reader import JinaURLReader
18
19
  from .unstructured_io import UnstructuredIO
@@ -22,4 +23,5 @@ __all__ = [
22
23
  'UnstructuredIO',
23
24
  'JinaURLReader',
24
25
  'Firecrawl',
26
+ 'ChunkrReader',
25
27
  ]
@@ -0,0 +1,163 @@
1
+ # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
2
+ # Licensed under the Apache License, Version 2.0 (the “License”);
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an “AS IS” BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
14
+
15
+ import json
16
+ import logging
17
+ import os
18
+ import time
19
+ from typing import IO, Any, Optional, Union
20
+
21
+ import requests
22
+
23
+ from camel.utils import api_keys_required
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class ChunkrReader:
29
+ r"""Chunkr Reader for processing documents and returning content
30
+ in various formats.
31
+
32
+ Args:
33
+ api_key (Optional[str], optional): The API key for Chunkr API. If not
34
+ provided, it will be retrieved from the environment variable
35
+ `CHUNKR_API_KEY`. (default: :obj:`None`)
36
+ url (Optional[str], optional): The url to the Chunkr service.
37
+ (default: :obj:`https://api.chunkr.ai/api/v1/task`)
38
+ timeout (int, optional): The maximum time in seconds to wait for the
39
+ API responses. (default: :obj:`30`)
40
+ **kwargs (Any): Additional keyword arguments for request headers.
41
+ """
42
+
43
+ @api_keys_required("CHUNKR_API_KEY")
44
+ def __init__(
45
+ self,
46
+ api_key: Optional[str] = None,
47
+ url: Optional[str] = "https://api.chunkr.ai/api/v1/task",
48
+ timeout: int = 30,
49
+ **kwargs: Any,
50
+ ) -> None:
51
+ self._api_key = api_key or os.getenv('CHUNKR_API_KEY')
52
+ self._url = os.getenv('CHUNKR_API_URL') or url
53
+ self._headers = {
54
+ "Authorization": f"{self._api_key}",
55
+ **kwargs,
56
+ }
57
+ self.timeout = timeout
58
+
59
+ def submit_task(
60
+ self,
61
+ file_path: str,
62
+ model: str = "Fast",
63
+ ocr_strategy: str = "Auto",
64
+ target_chunk_length: str = "512",
65
+ ) -> str:
66
+ r"""Submits a file to the Chunkr API and returns the task ID.
67
+
68
+ Args:
69
+ file_path (str): The path to the file to be uploaded.
70
+ model (str, optional): The model to be used for the task.
71
+ (default: :obj:`Fast`)
72
+ ocr_strategy (str, optional): The OCR strategy. Defaults to 'Auto'.
73
+ target_chunk_length (str, optional): The target chunk length.
74
+ (default: :obj:`512`)
75
+
76
+ Returns:
77
+ str: The task ID.
78
+ """
79
+ with open(file_path, 'rb') as file:
80
+ files: dict[
81
+ str, Union[tuple[None, IO[bytes]], tuple[None, str]]
82
+ ] = {
83
+ 'file': (
84
+ None,
85
+ file,
86
+ ), # Properly pass the file as a binary stream
87
+ 'model': (None, model),
88
+ 'ocr_strategy': (None, ocr_strategy),
89
+ 'target_chunk_length': (None, target_chunk_length),
90
+ }
91
+ try:
92
+ response = requests.post(
93
+ self._url, # type: ignore[arg-type]
94
+ headers=self._headers,
95
+ files=files,
96
+ timeout=self.timeout,
97
+ )
98
+ response.raise_for_status()
99
+ task_id = response.json().get('task_id')
100
+ if not task_id:
101
+ raise ValueError("Task ID not returned in the response.")
102
+ logger.info(f"Task submitted successfully. Task ID: {task_id}")
103
+ return task_id
104
+ except Exception as e:
105
+ logger.error(f"Failed to submit task: {e}")
106
+ raise ValueError(f"Failed to submit task: {e}") from e
107
+
108
+ def get_task_output(self, task_id: str, max_retries: int = 5) -> str:
109
+ r"""Polls the Chunkr API to check the task status and returns the task
110
+ result.
111
+
112
+ Args:
113
+ task_id (str): The task ID to check the status for.
114
+ max_retries (int, optional): Maximum number of retry attempts.
115
+ (default: :obj:`5`)
116
+
117
+ Returns:
118
+ str: The formatted task result in JSON format.
119
+
120
+ Raises:
121
+ ValueError: If the task status cannot be retrieved.
122
+ RuntimeError: If the maximum number of retries is reached without
123
+ a successful task completion.
124
+ """
125
+ url_get = f"{self._url}/{task_id}"
126
+ attempts = 0
127
+
128
+ while attempts < max_retries:
129
+ try:
130
+ response = requests.get(
131
+ url_get, headers=self._headers, timeout=self.timeout
132
+ )
133
+ response.raise_for_status()
134
+ task_status = response.json().get('status')
135
+
136
+ if task_status == "Succeeded":
137
+ logger.info(f"Task {task_id} completed successfully.")
138
+ return self._pretty_print_response(response.json())
139
+ else:
140
+ logger.info(
141
+ f"Task {task_id} is still {task_status}. Retrying "
142
+ "in 5 seconds..."
143
+ )
144
+ except Exception as e:
145
+ logger.error(f"Failed to retrieve task status: {e}")
146
+ raise ValueError(f"Failed to retrieve task status: {e}") from e
147
+
148
+ attempts += 1
149
+ time.sleep(5)
150
+
151
+ logger.error(f"Max retries reached for task {task_id}.")
152
+ raise RuntimeError(f"Max retries reached for task {task_id}.")
153
+
154
+ def _pretty_print_response(self, response_json: dict) -> str:
155
+ r"""Pretty prints the JSON response.
156
+
157
+ Args:
158
+ response_json (dict): The response JSON to pretty print.
159
+
160
+ Returns:
161
+ str: Formatted JSON as a string.
162
+ """
163
+ return json.dumps(response_json, indent=4)
@@ -49,7 +49,6 @@ class Firecrawl:
49
49
  self,
50
50
  url: str,
51
51
  params: Optional[Dict[str, Any]] = None,
52
- wait_until_done: bool = True,
53
52
  **kwargs: Any,
54
53
  ) -> Any:
55
54
  r"""Crawl a URL and all accessible subpages. Customize the crawl by
@@ -60,14 +59,12 @@ class Firecrawl:
60
59
  url (str): The URL to crawl.
61
60
  params (Optional[Dict[str, Any]]): Additional parameters for the
62
61
  crawl request. Defaults to `None`.
63
- wait_until_done (bool): Whether to wait until the crawl job is
64
- completed. Defaults to `True`.
65
62
  **kwargs (Any): Additional keyword arguments, such as
66
- `poll_interval`, `idempotency_key`, etc.
63
+ `poll_interval`, `idempotency_key`.
67
64
 
68
65
  Returns:
69
- Any: The list content of the URL if `wait_until_done` is True;
70
- otherwise, a string job ID.
66
+ Any: The crawl job ID or the crawl results if waiting until
67
+ completion.
71
68
 
72
69
  Raises:
73
70
  RuntimeError: If the crawling process fails.
@@ -78,13 +75,8 @@ class Firecrawl:
78
75
  url=url,
79
76
  params=params,
80
77
  **kwargs,
81
- wait_until_done=wait_until_done,
82
- )
83
- return (
84
- crawl_response
85
- if wait_until_done
86
- else crawl_response.get("jobId")
87
78
  )
79
+ return crawl_response
88
80
  except Exception as e:
89
81
  raise RuntimeError(f"Failed to crawl the URL: {e}")
90
82
 
@@ -103,7 +95,10 @@ class Firecrawl:
103
95
  """
104
96
 
105
97
  try:
106
- crawl_result = self.app.crawl_url(url=url)
98
+ crawl_result = self.app.crawl_url(
99
+ url,
100
+ {'formats': ['markdown']},
101
+ )
107
102
  if not isinstance(crawl_result, list):
108
103
  raise ValueError("Unexpected response format")
109
104
  markdown_contents = [
@@ -160,12 +155,12 @@ class Firecrawl:
160
155
  except Exception as e:
161
156
  raise RuntimeError(f"Failed to scrape the URL: {e}")
162
157
 
163
- def structured_scrape(self, url: str, output_schema: BaseModel) -> Dict:
158
+ def structured_scrape(self, url: str, response_format: BaseModel) -> Dict:
164
159
  r"""Use LLM to extract structured data from given URL.
165
160
 
166
161
  Args:
167
162
  url (str): The URL to read.
168
- output_schema (BaseModel): A pydantic model
163
+ response_format (BaseModel): A pydantic model
169
164
  that includes value types and field descriptions used to
170
165
  generate a structured response by LLM. This schema helps
171
166
  in defining the expected output format.
@@ -180,41 +175,14 @@ class Firecrawl:
180
175
  data = self.app.scrape_url(
181
176
  url,
182
177
  {
183
- 'extractorOptions': {
184
- "mode": "llm-extraction",
185
- "extractionPrompt": "Based on the information on "
186
- "the page, extract the information from the schema.",
187
- 'extractionSchema': output_schema.model_json_schema(),
188
- },
189
- 'pageOptions': {'onlyMainContent': True},
178
+ 'formats': ['extract'],
179
+ 'extract': {'schema': response_format.model_json_schema()},
190
180
  },
191
181
  )
192
- return data.get("llm_extraction", {})
182
+ return data.get("extract", {})
193
183
  except Exception as e:
194
184
  raise RuntimeError(f"Failed to perform structured scrape: {e}")
195
185
 
196
- def tidy_scrape(self, url: str) -> str:
197
- r"""Only return the main content of the page, excluding headers,
198
- navigation bars, footers, etc. in Markdown format.
199
-
200
- Args:
201
- url (str): The URL to read.
202
-
203
- Returns:
204
- str: The markdown content of the URL.
205
-
206
- Raises:
207
- RuntimeError: If the scrape process fails.
208
- """
209
-
210
- try:
211
- scrape_result = self.app.scrape_url(
212
- url, {'pageOptions': {'onlyMainContent': True}}
213
- )
214
- return scrape_result.get("markdown", "")
215
- except Exception as e:
216
- raise RuntimeError(f"Failed to perform tidy scrape: {e}")
217
-
218
186
  def map_site(
219
187
  self, url: str, params: Optional[Dict[str, Any]] = None
220
188
  ) -> list: