camel-ai 0.2.15a0__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (95) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/chat_agent.py +18 -4
  3. camel/agents/multi_hop_generator_agent.py +85 -0
  4. camel/agents/programmed_agent_instruction.py +148 -0
  5. camel/benchmarks/__init__.py +13 -1
  6. camel/benchmarks/apibank.py +565 -0
  7. camel/benchmarks/apibench.py +500 -0
  8. camel/benchmarks/gaia.py +4 -4
  9. camel/benchmarks/nexus.py +518 -0
  10. camel/benchmarks/ragbench.py +333 -0
  11. camel/bots/__init__.py +1 -1
  12. camel/bots/discord/__init__.py +26 -0
  13. camel/bots/discord/discord_app.py +384 -0
  14. camel/bots/discord/discord_installation.py +64 -0
  15. camel/bots/discord/discord_store.py +160 -0
  16. camel/configs/__init__.py +3 -0
  17. camel/configs/anthropic_config.py +17 -15
  18. camel/configs/internlm_config.py +60 -0
  19. camel/data_collector/base.py +5 -5
  20. camel/data_collector/sharegpt_collector.py +2 -2
  21. camel/datagen/__init__.py +6 -2
  22. camel/datagen/{o1datagen.py → cotdatagen.py} +19 -6
  23. camel/datagen/self_instruct/__init__.py +36 -0
  24. camel/datagen/self_instruct/filter/__init__.py +34 -0
  25. camel/datagen/self_instruct/filter/filter_function.py +216 -0
  26. camel/datagen/self_instruct/filter/filter_registry.py +56 -0
  27. camel/datagen/self_instruct/filter/instruction_filter.py +81 -0
  28. camel/datagen/self_instruct/self_instruct.py +393 -0
  29. camel/datagen/self_instruct/templates.py +382 -0
  30. camel/datahubs/huggingface.py +12 -2
  31. camel/datahubs/models.py +2 -3
  32. camel/embeddings/mistral_embedding.py +5 -1
  33. camel/embeddings/openai_compatible_embedding.py +6 -1
  34. camel/embeddings/openai_embedding.py +5 -1
  35. camel/interpreters/e2b_interpreter.py +5 -1
  36. camel/loaders/__init__.py +2 -0
  37. camel/loaders/apify_reader.py +5 -1
  38. camel/loaders/chunkr_reader.py +5 -1
  39. camel/loaders/firecrawl_reader.py +0 -30
  40. camel/loaders/panda_reader.py +337 -0
  41. camel/logger.py +11 -5
  42. camel/messages/__init__.py +10 -4
  43. camel/messages/conversion/conversation_models.py +5 -0
  44. camel/messages/func_message.py +30 -22
  45. camel/models/__init__.py +2 -0
  46. camel/models/anthropic_model.py +6 -23
  47. camel/models/azure_openai_model.py +1 -2
  48. camel/models/cohere_model.py +13 -1
  49. camel/models/deepseek_model.py +5 -1
  50. camel/models/gemini_model.py +15 -2
  51. camel/models/groq_model.py +5 -1
  52. camel/models/internlm_model.py +143 -0
  53. camel/models/mistral_model.py +19 -8
  54. camel/models/model_factory.py +3 -0
  55. camel/models/nemotron_model.py +5 -1
  56. camel/models/nvidia_model.py +5 -1
  57. camel/models/openai_model.py +5 -1
  58. camel/models/qwen_model.py +5 -1
  59. camel/models/reka_model.py +5 -1
  60. camel/models/reward/__init__.py +2 -0
  61. camel/models/reward/nemotron_model.py +5 -1
  62. camel/models/reward/skywork_model.py +88 -0
  63. camel/models/samba_model.py +5 -1
  64. camel/models/togetherai_model.py +5 -1
  65. camel/models/yi_model.py +5 -1
  66. camel/models/zhipuai_model.py +5 -1
  67. camel/schemas/openai_converter.py +5 -1
  68. camel/storages/graph_storages/nebula_graph.py +89 -20
  69. camel/storages/graph_storages/neo4j_graph.py +138 -0
  70. camel/synthetic_datagen/source2synth/data_processor.py +373 -0
  71. camel/synthetic_datagen/source2synth/models.py +68 -0
  72. camel/synthetic_datagen/source2synth/user_data_processor_config.py +73 -0
  73. camel/toolkits/__init__.py +4 -0
  74. camel/toolkits/arxiv_toolkit.py +20 -3
  75. camel/toolkits/dappier_toolkit.py +196 -0
  76. camel/toolkits/function_tool.py +61 -61
  77. camel/toolkits/google_scholar_toolkit.py +9 -0
  78. camel/toolkits/meshy_toolkit.py +5 -1
  79. camel/toolkits/notion_toolkit.py +1 -1
  80. camel/toolkits/openbb_toolkit.py +869 -0
  81. camel/toolkits/search_toolkit.py +91 -5
  82. camel/toolkits/stripe_toolkit.py +5 -1
  83. camel/toolkits/twitter_toolkit.py +24 -16
  84. camel/types/__init__.py +4 -2
  85. camel/types/enums.py +34 -1
  86. camel/types/openai_types.py +6 -4
  87. camel/types/unified_model_type.py +5 -0
  88. camel/utils/__init__.py +2 -0
  89. camel/utils/commons.py +104 -19
  90. camel/utils/token_counting.py +3 -3
  91. {camel_ai-0.2.15a0.dist-info → camel_ai-0.2.17.dist-info}/METADATA +160 -177
  92. {camel_ai-0.2.15a0.dist-info → camel_ai-0.2.17.dist-info}/RECORD +94 -69
  93. {camel_ai-0.2.15a0.dist-info → camel_ai-0.2.17.dist-info}/WHEEL +1 -1
  94. camel/bots/discord_app.py +0 -138
  95. {camel_ai-0.2.15a0.dist-info → camel_ai-0.2.17.dist-info}/LICENSE +0 -0
@@ -0,0 +1,373 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+ import random
16
+ from typing import Any, Dict, List, Optional, Sequence
17
+
18
+ import numpy as np
19
+ from tqdm import tqdm
20
+
21
+ from camel.agents.multi_hop_generator_agent import MultiHopGeneratorAgent
22
+ from camel.logger import get_logger
23
+ from camel.synthetic_datagen.source2synth.user_data_processor_config import (
24
+ ProcessorConfig,
25
+ )
26
+
27
+ logger = get_logger(__name__)
28
+
29
+
30
+ class UserDataProcessor:
31
+ r"""User Data Processor."""
32
+
33
+ def __init__(self, config: Optional[ProcessorConfig] = None):
34
+ self.config = config or ProcessorConfig()
35
+ random.seed(self.config.seed)
36
+ np.random.seed(self.config.seed)
37
+ self.multi_hop_agent = (
38
+ MultiHopGeneratorAgent() if self.config.use_ai_model else None
39
+ )
40
+
41
+ def process_text(
42
+ self, text: str, source: str = "user_input"
43
+ ) -> List[Dict[str, Any]]:
44
+ r"""Process a single text."""
45
+ # Convert text to standard format
46
+ raw_data = [
47
+ {
48
+ 'text': text,
49
+ 'source': source,
50
+ }
51
+ ]
52
+
53
+ # Construct examples
54
+ constructor = ExampleConstructor(self.config, self.multi_hop_agent)
55
+ examples = constructor.construct_examples(raw_data)
56
+
57
+ # Manage data
58
+ curator = DataCurator(self.config)
59
+ final_dataset = curator.curate_dataset(examples)
60
+
61
+ return final_dataset
62
+
63
+ def process_batch(
64
+ self, texts: List[str], sources: Optional[List[str]] = None
65
+ ) -> List[Dict[str, Any]]:
66
+ r"""Process multiple texts in batch."""
67
+ if sources is None:
68
+ sources = ["user_input"] * len(texts)
69
+ elif len(sources) != len(texts):
70
+ raise ValueError("Length of sources must match length of texts")
71
+
72
+ raw_data = [
73
+ {
74
+ 'text': text,
75
+ 'source': source,
76
+ }
77
+ for text, source in zip(texts, sources)
78
+ ]
79
+
80
+ # Construct examples
81
+ constructor = ExampleConstructor(self.config, self.multi_hop_agent)
82
+ examples = constructor.construct_examples(raw_data)
83
+
84
+ # Manage data
85
+ curator = DataCurator(self.config)
86
+ final_dataset = curator.curate_dataset(examples)
87
+
88
+ return final_dataset
89
+
90
+
91
+ class ExampleConstructor:
92
+ r"""Example Constructor."""
93
+
94
+ def __init__(
95
+ self,
96
+ config: ProcessorConfig,
97
+ multi_hop_agent: Optional[MultiHopGeneratorAgent] = None,
98
+ ):
99
+ self.config = config
100
+ self.multi_hop_agent = multi_hop_agent
101
+
102
+ def construct_examples(
103
+ self, raw_data: List[Dict[str, Any]]
104
+ ) -> List[Dict[str, Any]]:
105
+ r"""Construct training examples."""
106
+ logger.info("Starting to construct training examples...")
107
+ examples = []
108
+
109
+ for data in tqdm(raw_data, desc="Constructing examples"):
110
+ # 1. Text preprocessing
111
+ processed_text = self._preprocess_text(data.get('text', ''))
112
+ if not processed_text:
113
+ continue
114
+
115
+ # 2. Generate key information pairs
116
+ info_pairs = self._extract_info_pairs(processed_text)
117
+
118
+ # 3. Construct question-answer pairs
119
+ qa_pairs = self._generate_qa_pairs(info_pairs)
120
+
121
+ # 4. Add metadata
122
+ example = {
123
+ 'text': processed_text,
124
+ 'qa_pairs': qa_pairs,
125
+ 'metadata': {
126
+ 'source': data.get('source', 'unknown'),
127
+ 'timestamp': data.get('timestamp', ''),
128
+ 'complexity': self._calculate_complexity(qa_pairs),
129
+ },
130
+ }
131
+
132
+ examples.append(example)
133
+
134
+ logger.info(f"Successfully constructed {len(examples)} examples")
135
+ return examples
136
+
137
+ def _preprocess_text(self, text: str) -> str:
138
+ r"""Text preprocessing."""
139
+ if not isinstance(text, str):
140
+ return ''
141
+
142
+ # 1. Basic cleaning
143
+ text = text.strip()
144
+
145
+ # 2. Length check
146
+ if (
147
+ len(text) < self.config.min_length
148
+ or len(text) > self.config.max_length
149
+ ):
150
+ return ''
151
+
152
+ # 3. Quality check
153
+ if not self._check_text_quality(text):
154
+ return ''
155
+
156
+ return text
157
+
158
+ def _check_text_quality(self, text: str) -> bool:
159
+ r"""Check text quality."""
160
+ # 1. Basic quality check
161
+ if text.count('.') < 2: # Must have at least 2 sentences
162
+ return False
163
+
164
+ # 2. Special character ratio check
165
+ special_char_ratio = len(
166
+ [c for c in text if not c.isalnum() and not c.isspace()]
167
+ ) / len(text)
168
+ if special_char_ratio > 0.3: # No more than 30% special characters
169
+ return False
170
+
171
+ return True
172
+
173
+ def _extract_info_pairs(self, text: str) -> List[Dict[str, Sequence[str]]]:
174
+ r"""Extract information pairs and relationships."""
175
+ # Split into sentences
176
+ sentences = [s.strip() for s in text.split('.') if s.strip()]
177
+ info_pairs = []
178
+
179
+ # Extract combinations of multiple related sentences
180
+ for i in range(len(sentences) - 2):
181
+ if len(sentences[i]) > 10 and len(sentences[i + 1]) > 10:
182
+ info_pairs.append(
183
+ {
184
+ 'premise': sentences[i],
185
+ 'intermediate': sentences[i + 1],
186
+ 'conclusion': sentences[i + 2]
187
+ if i + 2 < len(sentences)
188
+ else '',
189
+ 'related_contexts': [
190
+ s
191
+ for j, s in enumerate(sentences)
192
+ if j != i and j != i + 1 and len(s) > 10
193
+ ][:2],
194
+ # Limit to 2 additional related contexts
195
+ }
196
+ )
197
+
198
+ return info_pairs
199
+
200
+ def _generate_qa_pairs(
201
+ self, info_pairs: List[Dict[str, Sequence[str]]]
202
+ ) -> List[Dict[str, str]]:
203
+ r"""Generate multi-hop question-answer pairs."""
204
+ qa_pairs = []
205
+
206
+ for pair in info_pairs:
207
+ # 1. Generate multi-hop question-answer pair using AI
208
+ if self.multi_hop_agent:
209
+ # Construct full context
210
+ context = (
211
+ f"{pair['premise']}. {pair['intermediate']}."
212
+ f" {pair['conclusion']}"
213
+ )
214
+ response = self.multi_hop_agent.generate_multi_hop_qa(context)
215
+ if response:
216
+ qa_pairs.append(response.value.dict())
217
+ continue
218
+
219
+ return qa_pairs
220
+
221
+ def _calculate_complexity(self, qa_pairs: List[Dict[str, Any]]) -> float:
222
+ r"""Calculate complexity of QA pairs."""
223
+ if not qa_pairs:
224
+ return 0.0
225
+
226
+ # Calculate complexity based on multiple factors
227
+ complexities = []
228
+ for qa in qa_pairs:
229
+ # 1. Number of reasoning steps
230
+ reasoning_steps_count = len(qa.get('reasoning_steps', []))
231
+
232
+ # 2. Number of supporting facts
233
+ supporting_facts_count = len(qa.get('supporting_facts', []))
234
+
235
+ # 3. Question length
236
+ question_length = len(qa['question'].split())
237
+
238
+ # 4. Answer length
239
+ answer_length = len(qa['answer'].split())
240
+
241
+ # Calculate complexity of a single QA pair
242
+ qa_complexity = (
243
+ min(reasoning_steps_count / 3, 1.0)
244
+ * 0.4 # Weight for reasoning steps
245
+ + min(supporting_facts_count / 3, 1.0)
246
+ * 0.3 # Weight for supporting facts
247
+ + min(question_length / 20, 1.0)
248
+ * 0.15 # Weight for question length
249
+ + min(answer_length / 50, 1.0) * 0.15
250
+ # Weight for answer length
251
+ )
252
+
253
+ complexities.append(qa_complexity)
254
+
255
+ return sum(complexities) / len(complexities)
256
+
257
+
258
+ class DataCurator:
259
+ r"""Data Manager."""
260
+
261
+ def __init__(self, config: ProcessorConfig):
262
+ self.config = config
263
+
264
+ def curate_dataset(
265
+ self, examples: List[Dict[str, Any]]
266
+ ) -> List[Dict[str, Any]]:
267
+ r"""Dataset management."""
268
+ logger.info("Starting dataset management...")
269
+
270
+ # 1. Quality filtering
271
+ quality_filtered = self._quality_filter(examples)
272
+ logger.info(
273
+ f"Remaining examples after quality filtering:"
274
+ f" {len(quality_filtered)}"
275
+ )
276
+
277
+ # 2. Complexity filtering
278
+ complexity_filtered = self._complexity_filter(quality_filtered)
279
+ logger.info(
280
+ f"Remaining examples after complexity filtering:"
281
+ f" {len(complexity_filtered)}"
282
+ )
283
+
284
+ # 3. Deduplication
285
+ deduplicated = self._remove_duplicates(complexity_filtered)
286
+ logger.info(
287
+ f"Remaining examples after deduplication: {len(deduplicated)}"
288
+ )
289
+
290
+ # 4. Sample to target size
291
+ final_dataset = self._sample_dataset(deduplicated)
292
+ logger.info(f"Final dataset size: {len(final_dataset)}")
293
+
294
+ return final_dataset
295
+
296
+ def _quality_filter(
297
+ self, examples: List[Dict[str, Any]]
298
+ ) -> List[Dict[str, Any]]:
299
+ r"""Quality filtering."""
300
+ filtered = []
301
+
302
+ for example in examples:
303
+ # 1. Check QA pair quality
304
+ qa_quality = self._check_qa_quality(example.get('qa_pairs', []))
305
+
306
+ # 2. Check text quality
307
+ text_quality = (
308
+ len(example.get('text', '').split()) >= 20
309
+ ) # At least 20 words
310
+
311
+ if qa_quality and text_quality:
312
+ filtered.append(example)
313
+
314
+ return filtered
315
+
316
+ def _check_qa_quality(self, qa_pairs: List[Dict[str, str]]) -> bool:
317
+ r"""Check quality of QA pairs."""
318
+ if not qa_pairs:
319
+ return False
320
+
321
+ for qa in qa_pairs:
322
+ # 1. Length check
323
+ if (
324
+ len(qa.get('question', '')) < 10
325
+ or len(qa.get('answer', '')) < 5
326
+ ):
327
+ return False
328
+
329
+ # 2. QA pair duplication check
330
+ if qa.get('question', '') == qa.get('answer', ''):
331
+ return False
332
+
333
+ return True
334
+
335
+ def _complexity_filter(
336
+ self, examples: List[Dict[str, Any]]
337
+ ) -> List[Dict[str, Any]]:
338
+ r"""Complexity filtering."""
339
+ return [
340
+ example
341
+ for example in examples
342
+ if example.get('metadata', {}).get('complexity', 0)
343
+ >= self.config.complexity_threshold
344
+ ]
345
+
346
+ def _remove_duplicates(
347
+ self, examples: List[Dict[str, Any]]
348
+ ) -> List[Dict[str, Any]]:
349
+ r"""Remove duplicates."""
350
+ seen = set()
351
+ unique_examples = []
352
+
353
+ for example in examples:
354
+ # Use text and QA pair combination as unique identifier
355
+ text = example.get('text', '')
356
+ qa_str = str(example.get('qa_pairs', []))
357
+
358
+ identifier = hash(text + qa_str)
359
+
360
+ if identifier not in seen:
361
+ seen.add(identifier)
362
+ unique_examples.append(example)
363
+
364
+ return unique_examples
365
+
366
+ def _sample_dataset(
367
+ self, examples: List[Dict[str, Any]]
368
+ ) -> List[Dict[str, Any]]:
369
+ r"""Sample to target dataset size."""
370
+ if len(examples) <= self.config.dataset_size:
371
+ return examples
372
+
373
+ return random.sample(examples, self.config.dataset_size)
@@ -0,0 +1,68 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+ from typing import Any, ClassVar, Dict, List, Optional
15
+
16
+ from pydantic import BaseModel, Field
17
+
18
+
19
+ class ReasoningStep(BaseModel):
20
+ step: str = Field(
21
+ ..., description="A single step in the reasoning process."
22
+ )
23
+
24
+
25
+ class MultiHopQA(BaseModel):
26
+ question: str = Field(
27
+ ..., description="The question that requires multi-hop reasoning."
28
+ )
29
+ reasoning_steps: List[ReasoningStep] = Field(
30
+ ...,
31
+ description="The steps involved in reasoning to answer the question.",
32
+ )
33
+ answer: str = Field(
34
+ ..., description="The answer to the multi-hop question."
35
+ )
36
+ supporting_facts: List[str] = Field(
37
+ ..., description="Facts that support the reasoning and answer."
38
+ )
39
+ type: str = Field(description="The type of question-answer pair.")
40
+
41
+ class Config:
42
+ json_schema_extra: ClassVar[Dict[str, Any]] = {
43
+ "example": {
44
+ "question": "What is the capital of France?",
45
+ "reasoning_steps": [
46
+ {"step": "Identify the country France."},
47
+ {"step": "Find the capital city of France."},
48
+ ],
49
+ "answer": "Paris",
50
+ "supporting_facts": [
51
+ "France is a country in Europe.",
52
+ "Paris is the capital city of France.",
53
+ ],
54
+ "type": "multi_hop_qa",
55
+ }
56
+ }
57
+
58
+
59
+ class ContextPrompt(BaseModel):
60
+ main_context: str = Field(
61
+ ...,
62
+ description="The main context for generating"
63
+ " the question-answer pair.",
64
+ )
65
+ related_contexts: Optional[List[str]] = Field(
66
+ default=None,
67
+ description="Additional contexts related to the main context.",
68
+ )
@@ -0,0 +1,73 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+ import random
16
+
17
+ from pydantic import BaseModel, ConfigDict, Field
18
+
19
+ from camel.agents.multi_hop_generator_agent import MultiHopGeneratorAgent
20
+
21
+
22
+ class ProcessorConfig(BaseModel):
23
+ r"""Data processing configuration class"""
24
+
25
+ def __repr__(self):
26
+ return "MultiHopGeneratorAgent()"
27
+
28
+ model_config = ConfigDict(
29
+ validate_assignment=True,
30
+ frozen=False,
31
+ protected_namespaces=(),
32
+ arbitrary_types_allowed=True,
33
+ )
34
+
35
+ seed: int = Field( # Generate a random seed for reproducibility
36
+ default_factory=lambda: random.randint(0, 1000),
37
+ description="Random seed for reproducibility",
38
+ )
39
+
40
+ min_length: int = Field(
41
+ default=50, description="Minimum text length", ge=0
42
+ )
43
+
44
+ max_length: int = Field(
45
+ default=512, description="Maximum text length", gt=0
46
+ )
47
+
48
+ quality_threshold: float = Field(
49
+ default=0.7,
50
+ description="Quality threshold for processing",
51
+ ge=0.0,
52
+ le=1.0,
53
+ )
54
+
55
+ complexity_threshold: float = Field(
56
+ default=0.5,
57
+ description="Complexity threshold for processing",
58
+ ge=0.0,
59
+ le=1.0,
60
+ )
61
+
62
+ dataset_size: int = Field(
63
+ default=1000, description="Target size of the dataset", gt=0
64
+ )
65
+
66
+ use_ai_model: bool = Field(
67
+ default=True, description="Whether to use AI model in processing"
68
+ )
69
+
70
+ hop_generating_agent: MultiHopGeneratorAgent = Field(
71
+ default_factory=lambda: MultiHopGeneratorAgent(),
72
+ description="Agent for generating multi-hop text",
73
+ )
@@ -28,6 +28,7 @@ from .ask_news_toolkit import AskNewsToolkit, AsyncAskNewsToolkit
28
28
  from .linkedin_toolkit import LinkedInToolkit
29
29
  from .reddit_toolkit import RedditToolkit
30
30
  from .meshy_toolkit import MeshyToolkit
31
+ from .openbb_toolkit import OpenBBToolkit
31
32
 
32
33
  from .base import BaseToolkit
33
34
  from .google_maps_toolkit import GoogleMapsToolkit
@@ -43,6 +44,7 @@ from .notion_toolkit import NotionToolkit
43
44
  from .human_toolkit import HumanToolkit
44
45
  from .stripe_toolkit import StripeToolkit
45
46
  from .video_toolkit import VideoDownloaderToolkit
47
+ from .dappier_toolkit import DappierToolkit
46
48
 
47
49
  __all__ = [
48
50
  'BaseToolkit',
@@ -73,4 +75,6 @@ __all__ = [
73
75
  'VideoDownloaderToolkit',
74
76
  'StripeToolkit',
75
77
  'MeshyToolkit',
78
+ 'OpenBBToolkit',
79
+ 'DappierToolkit',
76
80
  ]
@@ -14,10 +14,13 @@
14
14
 
15
15
  from typing import Dict, Generator, List, Optional
16
16
 
17
+ from camel.logger import get_logger
17
18
  from camel.toolkits.base import BaseToolkit
18
19
  from camel.toolkits.function_tool import FunctionTool
19
20
  from camel.utils import dependencies_required
20
21
 
22
+ logger = get_logger(__name__)
23
+
21
24
 
22
25
  class ArxivToolkit(BaseToolkit):
23
26
  r"""A toolkit for interacting with the arXiv API to search and download
@@ -98,10 +101,24 @@ class ArxivToolkit(BaseToolkit):
98
101
  "authors": [author.name for author in paper.authors],
99
102
  "entry_id": paper.entry_id,
100
103
  "summary": paper.summary,
101
- # TODO: Use chunkr instead of atxiv_to_text for better
102
- # performance
103
- "paper_text": arxiv_to_text(paper.pdf_url),
104
+ "pdf_url": paper.pdf_url,
104
105
  }
106
+
107
+ # Extract text from the paper
108
+ try:
109
+ # TODO: Use chunkr instead of atxiv_to_text for better
110
+ # performance and reliability
111
+ text = arxiv_to_text(paper_info["pdf_url"])
112
+ except Exception as e:
113
+ logger.error(
114
+ "Failed to extract text content from the PDF at "
115
+ "the specified URL. "
116
+ f"URL: {paper_info.get('pdf_url', 'Unknown')} | Error: {e}"
117
+ )
118
+ text = ""
119
+
120
+ paper_info['paper_text'] = text
121
+
105
122
  papers_data.append(paper_info)
106
123
 
107
124
  return papers_data