camel-ai 0.2.20a1__py3-none-any.whl → 0.2.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +2 -3
- camel/agents/knowledge_graph_agent.py +1 -5
- camel/benchmarks/apibench.py +1 -5
- camel/benchmarks/nexus.py +1 -5
- camel/benchmarks/ragbench.py +2 -2
- camel/bots/telegram_bot.py +1 -5
- camel/configs/__init__.py +3 -0
- camel/configs/aiml_config.py +80 -0
- camel/datagen/__init__.py +3 -1
- camel/datagen/self_improving_cot.py +821 -0
- camel/datagen/self_instruct/self_instruct.py +1 -1
- camel/embeddings/openai_embedding.py +10 -1
- camel/interpreters/docker/Dockerfile +12 -0
- camel/interpreters/docker_interpreter.py +19 -1
- camel/interpreters/subprocess_interpreter.py +97 -6
- camel/loaders/__init__.py +2 -0
- camel/loaders/mineru_extractor.py +250 -0
- camel/models/__init__.py +2 -0
- camel/models/aiml_model.py +147 -0
- camel/models/base_model.py +54 -1
- camel/models/deepseek_model.py +0 -18
- camel/models/model_factory.py +3 -0
- camel/models/siliconflow_model.py +1 -1
- camel/societies/workforce/role_playing_worker.py +2 -4
- camel/societies/workforce/single_agent_worker.py +1 -6
- camel/societies/workforce/workforce.py +3 -9
- camel/toolkits/__init__.py +5 -0
- camel/toolkits/mineru_toolkit.py +178 -0
- camel/toolkits/reddit_toolkit.py +8 -38
- camel/toolkits/sympy_toolkit.py +816 -0
- camel/toolkits/whatsapp_toolkit.py +11 -32
- camel/types/enums.py +25 -1
- camel/utils/__init__.py +7 -2
- camel/utils/commons.py +198 -21
- camel/utils/deduplication.py +232 -0
- camel/utils/token_counting.py +0 -38
- {camel_ai-0.2.20a1.dist-info → camel_ai-0.2.22.dist-info}/METADATA +10 -13
- {camel_ai-0.2.20a1.dist-info → camel_ai-0.2.22.dist-info}/RECORD +42 -34
- {camel_ai-0.2.20a1.dist-info → camel_ai-0.2.22.dist-info}/WHEEL +1 -1
- /camel/datagen/{cotdatagen.py → cot_datagen.py} +0 -0
- {camel_ai-0.2.20a1.dist-info → camel_ai-0.2.22.dist-info}/LICENSE +0 -0
camel/models/base_model.py
CHANGED
|
@@ -11,6 +11,8 @@
|
|
|
11
11
|
# See the License for the specific language governing permissions and
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
import abc
|
|
15
|
+
import re
|
|
14
16
|
from abc import ABC, abstractmethod
|
|
15
17
|
from typing import Any, Dict, List, Optional, Union
|
|
16
18
|
|
|
@@ -27,7 +29,30 @@ from camel.types import (
|
|
|
27
29
|
from camel.utils import BaseTokenCounter
|
|
28
30
|
|
|
29
31
|
|
|
30
|
-
class
|
|
32
|
+
class ModelBackendMeta(abc.ABCMeta):
|
|
33
|
+
r"""Metaclass that automatically preprocesses messages in run method.
|
|
34
|
+
|
|
35
|
+
Automatically wraps the run method of any class inheriting from
|
|
36
|
+
BaseModelBackend to preprocess messages (remove <think> tags) before they
|
|
37
|
+
are sent to the model.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __new__(mcs, name, bases, namespace):
|
|
41
|
+
r"""Wraps run method with preprocessing if it exists in the class."""
|
|
42
|
+
if 'run' in namespace:
|
|
43
|
+
original_run = namespace['run']
|
|
44
|
+
|
|
45
|
+
def wrapped_run(
|
|
46
|
+
self, messages: List[OpenAIMessage], *args, **kwargs
|
|
47
|
+
):
|
|
48
|
+
messages = self.preprocess_messages(messages)
|
|
49
|
+
return original_run(self, messages, *args, **kwargs)
|
|
50
|
+
|
|
51
|
+
namespace['run'] = wrapped_run
|
|
52
|
+
return super().__new__(mcs, name, bases, namespace)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class BaseModelBackend(ABC, metaclass=ModelBackendMeta):
|
|
31
56
|
r"""Base class for different model backends.
|
|
32
57
|
It may be OpenAI API, a local LLM, a stub for unit tests, etc.
|
|
33
58
|
|
|
@@ -73,6 +98,34 @@ class BaseModelBackend(ABC):
|
|
|
73
98
|
"""
|
|
74
99
|
pass
|
|
75
100
|
|
|
101
|
+
def preprocess_messages(
|
|
102
|
+
self, messages: List[OpenAIMessage]
|
|
103
|
+
) -> List[OpenAIMessage]:
|
|
104
|
+
r"""Preprocess messages before sending to model API.
|
|
105
|
+
Removes thinking content and other model-specific preprocessing.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
messages (List[OpenAIMessage]): Original messages
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
List[OpenAIMessage]: Preprocessed messages
|
|
112
|
+
"""
|
|
113
|
+
# Remove thinking content from messages before sending to API
|
|
114
|
+
# This ensures only the final response is sent, excluding
|
|
115
|
+
# intermediate thought processes
|
|
116
|
+
return [
|
|
117
|
+
{ # type: ignore[misc]
|
|
118
|
+
**msg,
|
|
119
|
+
'content': re.sub(
|
|
120
|
+
r'<think>.*?</think>',
|
|
121
|
+
'',
|
|
122
|
+
msg['content'], # type: ignore[arg-type]
|
|
123
|
+
flags=re.DOTALL,
|
|
124
|
+
).strip(),
|
|
125
|
+
}
|
|
126
|
+
for msg in messages
|
|
127
|
+
]
|
|
128
|
+
|
|
76
129
|
@abstractmethod
|
|
77
130
|
def run(
|
|
78
131
|
self,
|
camel/models/deepseek_model.py
CHANGED
|
@@ -118,8 +118,6 @@ class DeepSeekModel(BaseModelBackend):
|
|
|
118
118
|
if self.model_type in [
|
|
119
119
|
ModelType.DEEPSEEK_REASONER,
|
|
120
120
|
]:
|
|
121
|
-
import re
|
|
122
|
-
|
|
123
121
|
logger.warning(
|
|
124
122
|
"You are using a DeepSeek Reasoner model, "
|
|
125
123
|
"which has certain limitations, reference: "
|
|
@@ -141,22 +139,6 @@ class DeepSeekModel(BaseModelBackend):
|
|
|
141
139
|
if key in self.model_config_dict:
|
|
142
140
|
del self.model_config_dict[key]
|
|
143
141
|
|
|
144
|
-
# Remove thinking content from messages before sending to API
|
|
145
|
-
# This ensures only the final response is sent, excluding
|
|
146
|
-
# intermediate thought processes
|
|
147
|
-
messages = [
|
|
148
|
-
{ # type: ignore[misc]
|
|
149
|
-
**msg,
|
|
150
|
-
'content': re.sub(
|
|
151
|
-
r'<think>.*?</think>',
|
|
152
|
-
'',
|
|
153
|
-
msg['content'], # type: ignore[arg-type]
|
|
154
|
-
flags=re.DOTALL,
|
|
155
|
-
).strip(),
|
|
156
|
-
}
|
|
157
|
-
for msg in messages
|
|
158
|
-
]
|
|
159
|
-
|
|
160
142
|
response = self._client.chat.completions.create(
|
|
161
143
|
messages=messages,
|
|
162
144
|
model=self.model_type,
|
camel/models/model_factory.py
CHANGED
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
from typing import Dict, Optional, Type, Union
|
|
15
15
|
|
|
16
|
+
from camel.models.aiml_model import AIMLModel
|
|
16
17
|
from camel.models.anthropic_model import AnthropicModel
|
|
17
18
|
from camel.models.azure_openai_model import AzureOpenAIModel
|
|
18
19
|
from camel.models.base_model import BaseModelBackend
|
|
@@ -104,6 +105,8 @@ class ModelFactory:
|
|
|
104
105
|
model_class = NvidiaModel
|
|
105
106
|
elif model_platform.is_siliconflow:
|
|
106
107
|
model_class = SiliconFlowModel
|
|
108
|
+
elif model_platform.is_aiml:
|
|
109
|
+
model_class = AIMLModel
|
|
107
110
|
|
|
108
111
|
elif model_platform.is_openai and model_type.is_openai:
|
|
109
112
|
model_class = OpenAIModel
|
|
@@ -18,7 +18,7 @@ from openai import OpenAI, Stream
|
|
|
18
18
|
|
|
19
19
|
from camel.configs import SILICONFLOW_API_PARAMS, SiliconFlowConfig
|
|
20
20
|
from camel.messages import OpenAIMessage
|
|
21
|
-
from camel.models import BaseModelBackend
|
|
21
|
+
from camel.models.base_model import BaseModelBackend
|
|
22
22
|
from camel.types import (
|
|
23
23
|
ChatCompletion,
|
|
24
24
|
ChatCompletionChunk,
|
|
@@ -168,11 +168,9 @@ class RolePlayingWorker(Worker):
|
|
|
168
168
|
chat_history=chat_history_str,
|
|
169
169
|
additional_info=task.additional_info,
|
|
170
170
|
)
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
content=prompt,
|
|
171
|
+
response = self.summarize_agent.step(
|
|
172
|
+
prompt, response_format=TaskResult
|
|
174
173
|
)
|
|
175
|
-
response = self.summarize_agent.step(req, response_format=TaskResult)
|
|
176
174
|
result_dict = json.loads(response.msg.content)
|
|
177
175
|
task_result = TaskResult(**result_dict)
|
|
178
176
|
task.result = task_result.content
|
|
@@ -19,7 +19,6 @@ from typing import Any, List
|
|
|
19
19
|
from colorama import Fore
|
|
20
20
|
|
|
21
21
|
from camel.agents import ChatAgent
|
|
22
|
-
from camel.messages.base import BaseMessage
|
|
23
22
|
from camel.societies.workforce.prompts import PROCESS_TASK_PROMPT
|
|
24
23
|
from camel.societies.workforce.utils import TaskResult
|
|
25
24
|
from camel.societies.workforce.worker import Worker
|
|
@@ -72,12 +71,8 @@ class SingleAgentWorker(Worker):
|
|
|
72
71
|
dependency_tasks_info=dependency_tasks_info,
|
|
73
72
|
additional_info=task.additional_info,
|
|
74
73
|
)
|
|
75
|
-
req = BaseMessage.make_user_message(
|
|
76
|
-
role_name="User",
|
|
77
|
-
content=prompt,
|
|
78
|
-
)
|
|
79
74
|
try:
|
|
80
|
-
response = self.worker.step(
|
|
75
|
+
response = self.worker.step(prompt, response_format=TaskResult)
|
|
81
76
|
except Exception as e:
|
|
82
77
|
print(
|
|
83
78
|
f"{Fore.RED}Error occurred while processing task {task.id}:"
|
|
@@ -281,13 +281,9 @@ class Workforce(BaseNode):
|
|
|
281
281
|
child_nodes_info=self._get_child_nodes_info(),
|
|
282
282
|
additional_info=task.additional_info,
|
|
283
283
|
)
|
|
284
|
-
req = BaseMessage.make_user_message(
|
|
285
|
-
role_name="User",
|
|
286
|
-
content=prompt,
|
|
287
|
-
)
|
|
288
284
|
|
|
289
285
|
response = self.coordinator_agent.step(
|
|
290
|
-
|
|
286
|
+
prompt, response_format=TaskAssignResult
|
|
291
287
|
)
|
|
292
288
|
result_dict = json.loads(response.msg.content)
|
|
293
289
|
task_assign_result = TaskAssignResult(**result_dict)
|
|
@@ -315,11 +311,9 @@ class Workforce(BaseNode):
|
|
|
315
311
|
child_nodes_info=self._get_child_nodes_info(),
|
|
316
312
|
additional_info=task.additional_info,
|
|
317
313
|
)
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
content=prompt,
|
|
314
|
+
response = self.coordinator_agent.step(
|
|
315
|
+
prompt, response_format=WorkerConf
|
|
321
316
|
)
|
|
322
|
-
response = self.coordinator_agent.step(req, response_format=WorkerConf)
|
|
323
317
|
result_dict = json.loads(response.msg.content)
|
|
324
318
|
new_node_conf = WorkerConf(**result_dict)
|
|
325
319
|
|
camel/toolkits/__init__.py
CHANGED
|
@@ -46,6 +46,9 @@ from .stripe_toolkit import StripeToolkit
|
|
|
46
46
|
from .video_toolkit import VideoDownloaderToolkit
|
|
47
47
|
from .dappier_toolkit import DappierToolkit
|
|
48
48
|
from .semantic_scholar_toolkit import SemanticScholarToolkit
|
|
49
|
+
from .sympy_toolkit import SymPyToolkit
|
|
50
|
+
from .mineru_toolkit import MinerUToolkit
|
|
51
|
+
|
|
49
52
|
|
|
50
53
|
__all__ = [
|
|
51
54
|
'BaseToolkit',
|
|
@@ -79,4 +82,6 @@ __all__ = [
|
|
|
79
82
|
'OpenBBToolkit',
|
|
80
83
|
'DappierToolkit',
|
|
81
84
|
'SemanticScholarToolkit',
|
|
85
|
+
'SymPyToolkit',
|
|
86
|
+
'MinerUToolkit',
|
|
82
87
|
]
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
|
|
15
|
+
from typing import Dict, List, Optional
|
|
16
|
+
|
|
17
|
+
from camel.loaders.mineru_extractor import MinerU
|
|
18
|
+
from camel.toolkits.base import BaseToolkit
|
|
19
|
+
from camel.toolkits.function_tool import FunctionTool
|
|
20
|
+
from camel.utils import api_keys_required
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class MinerUToolkit(BaseToolkit):
|
|
24
|
+
r"""Toolkit for extracting and processing document content
|
|
25
|
+
using MinerU API.
|
|
26
|
+
|
|
27
|
+
Provides comprehensive document processing capabilities including content
|
|
28
|
+
extraction from URLs and files, with support for OCR, formula recognition,
|
|
29
|
+
and table detection through the MinerU API service.
|
|
30
|
+
|
|
31
|
+
Note:
|
|
32
|
+
- Maximum file size: 200MB per file
|
|
33
|
+
- Maximum pages: 600 pages per file
|
|
34
|
+
- Daily quota: 2000 pages for high-priority parsing
|
|
35
|
+
- Network restrictions may affect certain URLs (e.g., GitHub, AWS)
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
@api_keys_required(
|
|
39
|
+
[
|
|
40
|
+
(None, "MINERU_API_KEY"),
|
|
41
|
+
]
|
|
42
|
+
)
|
|
43
|
+
def __init__(
|
|
44
|
+
self,
|
|
45
|
+
api_key: Optional[str] = None,
|
|
46
|
+
api_url: Optional[str] = "https://mineru.net/api/v4",
|
|
47
|
+
is_ocr: bool = False,
|
|
48
|
+
enable_formula: bool = False,
|
|
49
|
+
enable_table: bool = True,
|
|
50
|
+
layout_model: str = "doclayout_yolo",
|
|
51
|
+
language: str = "en",
|
|
52
|
+
wait: bool = True,
|
|
53
|
+
timeout: float = 300,
|
|
54
|
+
) -> None:
|
|
55
|
+
r"""Initialize the MinerU document processing toolkit.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
api_key (Optional[str]): Authentication key for MinerU API access.
|
|
59
|
+
If not provided, uses MINERU_API_KEY environment variable.
|
|
60
|
+
(default: :obj:`None`)
|
|
61
|
+
api_url (Optional[str]): Base endpoint URL for MinerU API service.
|
|
62
|
+
(default: :obj:`"https://mineru.net/api/v4"`)
|
|
63
|
+
is_ocr (bool): Enable Optical Character Recognition for image-based
|
|
64
|
+
text extraction. (default: :obj:`False`)
|
|
65
|
+
enable_formula (bool): Enable mathematical formula detection and
|
|
66
|
+
recognition. (default: :obj:`False`)
|
|
67
|
+
enable_table (bool): Enable table structure detection and
|
|
68
|
+
extraction. (default: :obj:`True`)
|
|
69
|
+
layout_model (str): Document layout analysis model selection.
|
|
70
|
+
Available options: 'doclayout_yolo', 'layoutlmv3'.
|
|
71
|
+
(default: :obj:`"doclayout_yolo"`)
|
|
72
|
+
language (str): Primary language of the document for processing.
|
|
73
|
+
(default: :obj:`"en"`)
|
|
74
|
+
wait (bool): Block execution until processing completion.
|
|
75
|
+
(default: :obj:`True`)
|
|
76
|
+
timeout (float): Maximum duration in seconds to wait for task
|
|
77
|
+
completion. (default: :obj:`300`)
|
|
78
|
+
"""
|
|
79
|
+
self.client = MinerU(
|
|
80
|
+
api_key=api_key,
|
|
81
|
+
api_url=api_url,
|
|
82
|
+
is_ocr=is_ocr,
|
|
83
|
+
enable_formula=enable_formula,
|
|
84
|
+
enable_table=enable_table,
|
|
85
|
+
layout_model=layout_model,
|
|
86
|
+
language=language,
|
|
87
|
+
)
|
|
88
|
+
self.wait = wait
|
|
89
|
+
self.timeout = timeout
|
|
90
|
+
|
|
91
|
+
def extract_from_urls(
|
|
92
|
+
self,
|
|
93
|
+
urls: str | List[str],
|
|
94
|
+
) -> Dict:
|
|
95
|
+
r"""Process and extract content from one or multiple URLs.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
urls (str | List[str]): Target URL or list of URLs for content
|
|
99
|
+
extraction. Supports both single URL string and multiple URLs
|
|
100
|
+
in a list.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
Dict: Response containing either completed task results when wait
|
|
104
|
+
is True, or task/batch identifiers for status tracking when
|
|
105
|
+
wait is False.
|
|
106
|
+
"""
|
|
107
|
+
if isinstance(urls, str):
|
|
108
|
+
# Single URL case
|
|
109
|
+
response = self.client.extract_url(url=urls)
|
|
110
|
+
|
|
111
|
+
if self.wait:
|
|
112
|
+
return self.client.wait_for_completion(
|
|
113
|
+
response['task_id'],
|
|
114
|
+
timeout=self.timeout,
|
|
115
|
+
)
|
|
116
|
+
return response
|
|
117
|
+
else:
|
|
118
|
+
# Multiple URLs case
|
|
119
|
+
files: List[Dict[str, str | bool]] = [
|
|
120
|
+
{"url": str(url)} for url in urls
|
|
121
|
+
]
|
|
122
|
+
batch_id = self.client.batch_extract_urls(files=files)
|
|
123
|
+
|
|
124
|
+
if self.wait:
|
|
125
|
+
return self.client.wait_for_completion(
|
|
126
|
+
batch_id,
|
|
127
|
+
is_batch=True,
|
|
128
|
+
timeout=self.timeout if self.timeout > 300 else 600,
|
|
129
|
+
)
|
|
130
|
+
return {"batch_id": batch_id}
|
|
131
|
+
|
|
132
|
+
def get_task_status(self, task_id: str) -> Dict:
|
|
133
|
+
r"""Retrieve current status of an individual extraction task.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
task_id (str): Unique identifier for the extraction task to check.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
Dict: Status information and results (if task is completed) for
|
|
140
|
+
the specified task.
|
|
141
|
+
|
|
142
|
+
Note:
|
|
143
|
+
This is a low-level status checking method. For most use cases,
|
|
144
|
+
prefer using extract_from_url with wait=True for automatic
|
|
145
|
+
completion handling.
|
|
146
|
+
"""
|
|
147
|
+
return self.client.get_task_status(task_id)
|
|
148
|
+
|
|
149
|
+
def get_batch_status(self, batch_id: str) -> Dict:
|
|
150
|
+
r"""Retrieve current status of a batch extraction task.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
batch_id (str): Unique identifier for the batch extraction task
|
|
154
|
+
to check.
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
Dict: Comprehensive status information and results for all files
|
|
158
|
+
in the batch task.
|
|
159
|
+
|
|
160
|
+
Note:
|
|
161
|
+
This is a low-level status checking method. For most use cases,
|
|
162
|
+
prefer using batch_extract_from_urls with wait=True for automatic
|
|
163
|
+
completion handling.
|
|
164
|
+
"""
|
|
165
|
+
return self.client.get_batch_status(batch_id)
|
|
166
|
+
|
|
167
|
+
def get_tools(self) -> List[FunctionTool]:
|
|
168
|
+
r"""Retrieve available toolkit functions as FunctionTool objects.
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
List[FunctionTool]: Collection of FunctionTool objects representing
|
|
172
|
+
the available document processing functions in this toolkit.
|
|
173
|
+
"""
|
|
174
|
+
return [
|
|
175
|
+
FunctionTool(self.extract_from_urls),
|
|
176
|
+
FunctionTool(self.get_task_status),
|
|
177
|
+
FunctionTool(self.get_batch_status),
|
|
178
|
+
]
|
camel/toolkits/reddit_toolkit.py
CHANGED
|
@@ -16,10 +16,9 @@ import os
|
|
|
16
16
|
import time
|
|
17
17
|
from typing import Any, Dict, List, Union
|
|
18
18
|
|
|
19
|
-
from requests.exceptions import RequestException
|
|
20
|
-
|
|
21
19
|
from camel.toolkits import FunctionTool
|
|
22
20
|
from camel.toolkits.base import BaseToolkit
|
|
21
|
+
from camel.utils import retry_on_error
|
|
23
22
|
|
|
24
23
|
|
|
25
24
|
class RedditToolkit(BaseToolkit):
|
|
@@ -61,30 +60,7 @@ class RedditToolkit(BaseToolkit):
|
|
|
61
60
|
request_timeout=30, # Set a timeout to handle delays
|
|
62
61
|
)
|
|
63
62
|
|
|
64
|
-
|
|
65
|
-
r"""Retries a function in case of network-related errors.
|
|
66
|
-
|
|
67
|
-
Args:
|
|
68
|
-
func (callable): The function to be retried.
|
|
69
|
-
*args: Arguments to pass to the function.
|
|
70
|
-
**kwargs: Keyword arguments to pass to the function.
|
|
71
|
-
|
|
72
|
-
Returns:
|
|
73
|
-
Any: The result of the function call if successful.
|
|
74
|
-
|
|
75
|
-
Raises:
|
|
76
|
-
RequestException: If all retry attempts fail.
|
|
77
|
-
"""
|
|
78
|
-
for attempt in range(self.retries):
|
|
79
|
-
try:
|
|
80
|
-
return func(*args, **kwargs)
|
|
81
|
-
except RequestException as e:
|
|
82
|
-
print(f"Attempt {attempt + 1}/{self.retries} failed: {e}")
|
|
83
|
-
if attempt < self.retries - 1:
|
|
84
|
-
time.sleep(self.delay)
|
|
85
|
-
else:
|
|
86
|
-
raise
|
|
87
|
-
|
|
63
|
+
@retry_on_error()
|
|
88
64
|
def collect_top_posts(
|
|
89
65
|
self,
|
|
90
66
|
subreddit_name: str,
|
|
@@ -113,8 +89,8 @@ class RedditToolkit(BaseToolkit):
|
|
|
113
89
|
"Please set the environment variables."
|
|
114
90
|
)
|
|
115
91
|
|
|
116
|
-
subreddit = self.
|
|
117
|
-
top_posts =
|
|
92
|
+
subreddit = self.reddit.subreddit(subreddit_name)
|
|
93
|
+
top_posts = subreddit.top(limit=post_limit)
|
|
118
94
|
data = []
|
|
119
95
|
|
|
120
96
|
for post in top_posts:
|
|
@@ -122,9 +98,7 @@ class RedditToolkit(BaseToolkit):
|
|
|
122
98
|
"Post Title": post.title,
|
|
123
99
|
"Comments": [
|
|
124
100
|
{"Comment Body": comment.body, "Upvotes": comment.score}
|
|
125
|
-
for comment in
|
|
126
|
-
lambda post=post: list(post.comments)
|
|
127
|
-
)[:comment_limit]
|
|
101
|
+
for comment in list(post.comments)[:comment_limit]
|
|
128
102
|
],
|
|
129
103
|
}
|
|
130
104
|
data.append(post_data)
|
|
@@ -192,15 +166,11 @@ class RedditToolkit(BaseToolkit):
|
|
|
192
166
|
data = []
|
|
193
167
|
|
|
194
168
|
for subreddit_name in subreddits:
|
|
195
|
-
subreddit = self.
|
|
196
|
-
|
|
197
|
-
)
|
|
198
|
-
top_posts = self._retry_request(subreddit.top, limit=post_limit)
|
|
169
|
+
subreddit = self.reddit.subreddit(subreddit_name)
|
|
170
|
+
top_posts = subreddit.top(limit=post_limit)
|
|
199
171
|
|
|
200
172
|
for post in top_posts:
|
|
201
|
-
for comment in
|
|
202
|
-
lambda post=post: list(post.comments)
|
|
203
|
-
)[:comment_limit]:
|
|
173
|
+
for comment in list(post.comments)[:comment_limit]:
|
|
204
174
|
# Print comment body for debugging
|
|
205
175
|
if any(
|
|
206
176
|
keyword.lower() in comment.body.lower()
|