camel-ai 0.2.16__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +18 -4
- camel/agents/multi_hop_generator_agent.py +85 -0
- camel/agents/programmed_agent_instruction.py +148 -0
- camel/benchmarks/__init__.py +2 -0
- camel/benchmarks/apibank.py +5 -0
- camel/benchmarks/apibench.py +8 -4
- camel/benchmarks/gaia.py +2 -2
- camel/benchmarks/ragbench.py +333 -0
- camel/bots/__init__.py +1 -1
- camel/bots/discord/__init__.py +26 -0
- camel/bots/discord/discord_app.py +384 -0
- camel/bots/discord/discord_installation.py +64 -0
- camel/bots/discord/discord_store.py +160 -0
- camel/configs/__init__.py +3 -0
- camel/configs/anthropic_config.py +17 -15
- camel/configs/internlm_config.py +60 -0
- camel/data_collector/base.py +5 -5
- camel/data_collector/sharegpt_collector.py +2 -2
- camel/datagen/self_instruct/self_instruct.py +1 -1
- camel/datagen/self_instruct/templates.py +12 -14
- camel/loaders/__init__.py +2 -0
- camel/loaders/panda_reader.py +337 -0
- camel/messages/__init__.py +10 -4
- camel/messages/conversion/conversation_models.py +5 -0
- camel/messages/func_message.py +30 -22
- camel/models/__init__.py +2 -0
- camel/models/anthropic_model.py +1 -22
- camel/models/cohere_model.py +8 -0
- camel/models/gemini_model.py +10 -1
- camel/models/internlm_model.py +143 -0
- camel/models/mistral_model.py +14 -7
- camel/models/model_factory.py +3 -0
- camel/models/reward/__init__.py +2 -0
- camel/models/reward/skywork_model.py +88 -0
- camel/synthetic_datagen/source2synth/data_processor.py +373 -0
- camel/synthetic_datagen/source2synth/models.py +68 -0
- camel/synthetic_datagen/source2synth/user_data_processor_config.py +73 -0
- camel/toolkits/google_scholar_toolkit.py +9 -0
- camel/types/__init__.py +4 -2
- camel/types/enums.py +34 -1
- camel/types/openai_types.py +6 -4
- camel/types/unified_model_type.py +5 -0
- camel/utils/token_counting.py +3 -3
- {camel_ai-0.2.16.dist-info → camel_ai-0.2.17.dist-info}/METADATA +158 -187
- {camel_ai-0.2.16.dist-info → camel_ai-0.2.17.dist-info}/RECORD +48 -35
- {camel_ai-0.2.16.dist-info → camel_ai-0.2.17.dist-info}/WHEEL +1 -1
- camel/bots/discord_app.py +0 -138
- {camel_ai-0.2.16.dist-info → camel_ai-0.2.17.dist-info}/LICENSE +0 -0
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
|
|
15
|
+
import random
|
|
16
|
+
from typing import Any, Dict, List, Optional, Sequence
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
from tqdm import tqdm
|
|
20
|
+
|
|
21
|
+
from camel.agents.multi_hop_generator_agent import MultiHopGeneratorAgent
|
|
22
|
+
from camel.logger import get_logger
|
|
23
|
+
from camel.synthetic_datagen.source2synth.user_data_processor_config import (
|
|
24
|
+
ProcessorConfig,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
logger = get_logger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class UserDataProcessor:
|
|
31
|
+
r"""User Data Processor."""
|
|
32
|
+
|
|
33
|
+
def __init__(self, config: Optional[ProcessorConfig] = None):
|
|
34
|
+
self.config = config or ProcessorConfig()
|
|
35
|
+
random.seed(self.config.seed)
|
|
36
|
+
np.random.seed(self.config.seed)
|
|
37
|
+
self.multi_hop_agent = (
|
|
38
|
+
MultiHopGeneratorAgent() if self.config.use_ai_model else None
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
def process_text(
|
|
42
|
+
self, text: str, source: str = "user_input"
|
|
43
|
+
) -> List[Dict[str, Any]]:
|
|
44
|
+
r"""Process a single text."""
|
|
45
|
+
# Convert text to standard format
|
|
46
|
+
raw_data = [
|
|
47
|
+
{
|
|
48
|
+
'text': text,
|
|
49
|
+
'source': source,
|
|
50
|
+
}
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
# Construct examples
|
|
54
|
+
constructor = ExampleConstructor(self.config, self.multi_hop_agent)
|
|
55
|
+
examples = constructor.construct_examples(raw_data)
|
|
56
|
+
|
|
57
|
+
# Manage data
|
|
58
|
+
curator = DataCurator(self.config)
|
|
59
|
+
final_dataset = curator.curate_dataset(examples)
|
|
60
|
+
|
|
61
|
+
return final_dataset
|
|
62
|
+
|
|
63
|
+
def process_batch(
|
|
64
|
+
self, texts: List[str], sources: Optional[List[str]] = None
|
|
65
|
+
) -> List[Dict[str, Any]]:
|
|
66
|
+
r"""Process multiple texts in batch."""
|
|
67
|
+
if sources is None:
|
|
68
|
+
sources = ["user_input"] * len(texts)
|
|
69
|
+
elif len(sources) != len(texts):
|
|
70
|
+
raise ValueError("Length of sources must match length of texts")
|
|
71
|
+
|
|
72
|
+
raw_data = [
|
|
73
|
+
{
|
|
74
|
+
'text': text,
|
|
75
|
+
'source': source,
|
|
76
|
+
}
|
|
77
|
+
for text, source in zip(texts, sources)
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
# Construct examples
|
|
81
|
+
constructor = ExampleConstructor(self.config, self.multi_hop_agent)
|
|
82
|
+
examples = constructor.construct_examples(raw_data)
|
|
83
|
+
|
|
84
|
+
# Manage data
|
|
85
|
+
curator = DataCurator(self.config)
|
|
86
|
+
final_dataset = curator.curate_dataset(examples)
|
|
87
|
+
|
|
88
|
+
return final_dataset
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class ExampleConstructor:
|
|
92
|
+
r"""Example Constructor."""
|
|
93
|
+
|
|
94
|
+
def __init__(
|
|
95
|
+
self,
|
|
96
|
+
config: ProcessorConfig,
|
|
97
|
+
multi_hop_agent: Optional[MultiHopGeneratorAgent] = None,
|
|
98
|
+
):
|
|
99
|
+
self.config = config
|
|
100
|
+
self.multi_hop_agent = multi_hop_agent
|
|
101
|
+
|
|
102
|
+
def construct_examples(
|
|
103
|
+
self, raw_data: List[Dict[str, Any]]
|
|
104
|
+
) -> List[Dict[str, Any]]:
|
|
105
|
+
r"""Construct training examples."""
|
|
106
|
+
logger.info("Starting to construct training examples...")
|
|
107
|
+
examples = []
|
|
108
|
+
|
|
109
|
+
for data in tqdm(raw_data, desc="Constructing examples"):
|
|
110
|
+
# 1. Text preprocessing
|
|
111
|
+
processed_text = self._preprocess_text(data.get('text', ''))
|
|
112
|
+
if not processed_text:
|
|
113
|
+
continue
|
|
114
|
+
|
|
115
|
+
# 2. Generate key information pairs
|
|
116
|
+
info_pairs = self._extract_info_pairs(processed_text)
|
|
117
|
+
|
|
118
|
+
# 3. Construct question-answer pairs
|
|
119
|
+
qa_pairs = self._generate_qa_pairs(info_pairs)
|
|
120
|
+
|
|
121
|
+
# 4. Add metadata
|
|
122
|
+
example = {
|
|
123
|
+
'text': processed_text,
|
|
124
|
+
'qa_pairs': qa_pairs,
|
|
125
|
+
'metadata': {
|
|
126
|
+
'source': data.get('source', 'unknown'),
|
|
127
|
+
'timestamp': data.get('timestamp', ''),
|
|
128
|
+
'complexity': self._calculate_complexity(qa_pairs),
|
|
129
|
+
},
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
examples.append(example)
|
|
133
|
+
|
|
134
|
+
logger.info(f"Successfully constructed {len(examples)} examples")
|
|
135
|
+
return examples
|
|
136
|
+
|
|
137
|
+
def _preprocess_text(self, text: str) -> str:
|
|
138
|
+
r"""Text preprocessing."""
|
|
139
|
+
if not isinstance(text, str):
|
|
140
|
+
return ''
|
|
141
|
+
|
|
142
|
+
# 1. Basic cleaning
|
|
143
|
+
text = text.strip()
|
|
144
|
+
|
|
145
|
+
# 2. Length check
|
|
146
|
+
if (
|
|
147
|
+
len(text) < self.config.min_length
|
|
148
|
+
or len(text) > self.config.max_length
|
|
149
|
+
):
|
|
150
|
+
return ''
|
|
151
|
+
|
|
152
|
+
# 3. Quality check
|
|
153
|
+
if not self._check_text_quality(text):
|
|
154
|
+
return ''
|
|
155
|
+
|
|
156
|
+
return text
|
|
157
|
+
|
|
158
|
+
def _check_text_quality(self, text: str) -> bool:
|
|
159
|
+
r"""Check text quality."""
|
|
160
|
+
# 1. Basic quality check
|
|
161
|
+
if text.count('.') < 2: # Must have at least 2 sentences
|
|
162
|
+
return False
|
|
163
|
+
|
|
164
|
+
# 2. Special character ratio check
|
|
165
|
+
special_char_ratio = len(
|
|
166
|
+
[c for c in text if not c.isalnum() and not c.isspace()]
|
|
167
|
+
) / len(text)
|
|
168
|
+
if special_char_ratio > 0.3: # No more than 30% special characters
|
|
169
|
+
return False
|
|
170
|
+
|
|
171
|
+
return True
|
|
172
|
+
|
|
173
|
+
def _extract_info_pairs(self, text: str) -> List[Dict[str, Sequence[str]]]:
|
|
174
|
+
r"""Extract information pairs and relationships."""
|
|
175
|
+
# Split into sentences
|
|
176
|
+
sentences = [s.strip() for s in text.split('.') if s.strip()]
|
|
177
|
+
info_pairs = []
|
|
178
|
+
|
|
179
|
+
# Extract combinations of multiple related sentences
|
|
180
|
+
for i in range(len(sentences) - 2):
|
|
181
|
+
if len(sentences[i]) > 10 and len(sentences[i + 1]) > 10:
|
|
182
|
+
info_pairs.append(
|
|
183
|
+
{
|
|
184
|
+
'premise': sentences[i],
|
|
185
|
+
'intermediate': sentences[i + 1],
|
|
186
|
+
'conclusion': sentences[i + 2]
|
|
187
|
+
if i + 2 < len(sentences)
|
|
188
|
+
else '',
|
|
189
|
+
'related_contexts': [
|
|
190
|
+
s
|
|
191
|
+
for j, s in enumerate(sentences)
|
|
192
|
+
if j != i and j != i + 1 and len(s) > 10
|
|
193
|
+
][:2],
|
|
194
|
+
# Limit to 2 additional related contexts
|
|
195
|
+
}
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
return info_pairs
|
|
199
|
+
|
|
200
|
+
def _generate_qa_pairs(
|
|
201
|
+
self, info_pairs: List[Dict[str, Sequence[str]]]
|
|
202
|
+
) -> List[Dict[str, str]]:
|
|
203
|
+
r"""Generate multi-hop question-answer pairs."""
|
|
204
|
+
qa_pairs = []
|
|
205
|
+
|
|
206
|
+
for pair in info_pairs:
|
|
207
|
+
# 1. Generate multi-hop question-answer pair using AI
|
|
208
|
+
if self.multi_hop_agent:
|
|
209
|
+
# Construct full context
|
|
210
|
+
context = (
|
|
211
|
+
f"{pair['premise']}. {pair['intermediate']}."
|
|
212
|
+
f" {pair['conclusion']}"
|
|
213
|
+
)
|
|
214
|
+
response = self.multi_hop_agent.generate_multi_hop_qa(context)
|
|
215
|
+
if response:
|
|
216
|
+
qa_pairs.append(response.value.dict())
|
|
217
|
+
continue
|
|
218
|
+
|
|
219
|
+
return qa_pairs
|
|
220
|
+
|
|
221
|
+
def _calculate_complexity(self, qa_pairs: List[Dict[str, Any]]) -> float:
|
|
222
|
+
r"""Calculate complexity of QA pairs."""
|
|
223
|
+
if not qa_pairs:
|
|
224
|
+
return 0.0
|
|
225
|
+
|
|
226
|
+
# Calculate complexity based on multiple factors
|
|
227
|
+
complexities = []
|
|
228
|
+
for qa in qa_pairs:
|
|
229
|
+
# 1. Number of reasoning steps
|
|
230
|
+
reasoning_steps_count = len(qa.get('reasoning_steps', []))
|
|
231
|
+
|
|
232
|
+
# 2. Number of supporting facts
|
|
233
|
+
supporting_facts_count = len(qa.get('supporting_facts', []))
|
|
234
|
+
|
|
235
|
+
# 3. Question length
|
|
236
|
+
question_length = len(qa['question'].split())
|
|
237
|
+
|
|
238
|
+
# 4. Answer length
|
|
239
|
+
answer_length = len(qa['answer'].split())
|
|
240
|
+
|
|
241
|
+
# Calculate complexity of a single QA pair
|
|
242
|
+
qa_complexity = (
|
|
243
|
+
min(reasoning_steps_count / 3, 1.0)
|
|
244
|
+
* 0.4 # Weight for reasoning steps
|
|
245
|
+
+ min(supporting_facts_count / 3, 1.0)
|
|
246
|
+
* 0.3 # Weight for supporting facts
|
|
247
|
+
+ min(question_length / 20, 1.0)
|
|
248
|
+
* 0.15 # Weight for question length
|
|
249
|
+
+ min(answer_length / 50, 1.0) * 0.15
|
|
250
|
+
# Weight for answer length
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
complexities.append(qa_complexity)
|
|
254
|
+
|
|
255
|
+
return sum(complexities) / len(complexities)
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
class DataCurator:
|
|
259
|
+
r"""Data Manager."""
|
|
260
|
+
|
|
261
|
+
def __init__(self, config: ProcessorConfig):
|
|
262
|
+
self.config = config
|
|
263
|
+
|
|
264
|
+
def curate_dataset(
|
|
265
|
+
self, examples: List[Dict[str, Any]]
|
|
266
|
+
) -> List[Dict[str, Any]]:
|
|
267
|
+
r"""Dataset management."""
|
|
268
|
+
logger.info("Starting dataset management...")
|
|
269
|
+
|
|
270
|
+
# 1. Quality filtering
|
|
271
|
+
quality_filtered = self._quality_filter(examples)
|
|
272
|
+
logger.info(
|
|
273
|
+
f"Remaining examples after quality filtering:"
|
|
274
|
+
f" {len(quality_filtered)}"
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
# 2. Complexity filtering
|
|
278
|
+
complexity_filtered = self._complexity_filter(quality_filtered)
|
|
279
|
+
logger.info(
|
|
280
|
+
f"Remaining examples after complexity filtering:"
|
|
281
|
+
f" {len(complexity_filtered)}"
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
# 3. Deduplication
|
|
285
|
+
deduplicated = self._remove_duplicates(complexity_filtered)
|
|
286
|
+
logger.info(
|
|
287
|
+
f"Remaining examples after deduplication: {len(deduplicated)}"
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
# 4. Sample to target size
|
|
291
|
+
final_dataset = self._sample_dataset(deduplicated)
|
|
292
|
+
logger.info(f"Final dataset size: {len(final_dataset)}")
|
|
293
|
+
|
|
294
|
+
return final_dataset
|
|
295
|
+
|
|
296
|
+
def _quality_filter(
|
|
297
|
+
self, examples: List[Dict[str, Any]]
|
|
298
|
+
) -> List[Dict[str, Any]]:
|
|
299
|
+
r"""Quality filtering."""
|
|
300
|
+
filtered = []
|
|
301
|
+
|
|
302
|
+
for example in examples:
|
|
303
|
+
# 1. Check QA pair quality
|
|
304
|
+
qa_quality = self._check_qa_quality(example.get('qa_pairs', []))
|
|
305
|
+
|
|
306
|
+
# 2. Check text quality
|
|
307
|
+
text_quality = (
|
|
308
|
+
len(example.get('text', '').split()) >= 20
|
|
309
|
+
) # At least 20 words
|
|
310
|
+
|
|
311
|
+
if qa_quality and text_quality:
|
|
312
|
+
filtered.append(example)
|
|
313
|
+
|
|
314
|
+
return filtered
|
|
315
|
+
|
|
316
|
+
def _check_qa_quality(self, qa_pairs: List[Dict[str, str]]) -> bool:
|
|
317
|
+
r"""Check quality of QA pairs."""
|
|
318
|
+
if not qa_pairs:
|
|
319
|
+
return False
|
|
320
|
+
|
|
321
|
+
for qa in qa_pairs:
|
|
322
|
+
# 1. Length check
|
|
323
|
+
if (
|
|
324
|
+
len(qa.get('question', '')) < 10
|
|
325
|
+
or len(qa.get('answer', '')) < 5
|
|
326
|
+
):
|
|
327
|
+
return False
|
|
328
|
+
|
|
329
|
+
# 2. QA pair duplication check
|
|
330
|
+
if qa.get('question', '') == qa.get('answer', ''):
|
|
331
|
+
return False
|
|
332
|
+
|
|
333
|
+
return True
|
|
334
|
+
|
|
335
|
+
def _complexity_filter(
|
|
336
|
+
self, examples: List[Dict[str, Any]]
|
|
337
|
+
) -> List[Dict[str, Any]]:
|
|
338
|
+
r"""Complexity filtering."""
|
|
339
|
+
return [
|
|
340
|
+
example
|
|
341
|
+
for example in examples
|
|
342
|
+
if example.get('metadata', {}).get('complexity', 0)
|
|
343
|
+
>= self.config.complexity_threshold
|
|
344
|
+
]
|
|
345
|
+
|
|
346
|
+
def _remove_duplicates(
|
|
347
|
+
self, examples: List[Dict[str, Any]]
|
|
348
|
+
) -> List[Dict[str, Any]]:
|
|
349
|
+
r"""Remove duplicates."""
|
|
350
|
+
seen = set()
|
|
351
|
+
unique_examples = []
|
|
352
|
+
|
|
353
|
+
for example in examples:
|
|
354
|
+
# Use text and QA pair combination as unique identifier
|
|
355
|
+
text = example.get('text', '')
|
|
356
|
+
qa_str = str(example.get('qa_pairs', []))
|
|
357
|
+
|
|
358
|
+
identifier = hash(text + qa_str)
|
|
359
|
+
|
|
360
|
+
if identifier not in seen:
|
|
361
|
+
seen.add(identifier)
|
|
362
|
+
unique_examples.append(example)
|
|
363
|
+
|
|
364
|
+
return unique_examples
|
|
365
|
+
|
|
366
|
+
def _sample_dataset(
|
|
367
|
+
self, examples: List[Dict[str, Any]]
|
|
368
|
+
) -> List[Dict[str, Any]]:
|
|
369
|
+
r"""Sample to target dataset size."""
|
|
370
|
+
if len(examples) <= self.config.dataset_size:
|
|
371
|
+
return examples
|
|
372
|
+
|
|
373
|
+
return random.sample(examples, self.config.dataset_size)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
from typing import Any, ClassVar, Dict, List, Optional
|
|
15
|
+
|
|
16
|
+
from pydantic import BaseModel, Field
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ReasoningStep(BaseModel):
|
|
20
|
+
step: str = Field(
|
|
21
|
+
..., description="A single step in the reasoning process."
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class MultiHopQA(BaseModel):
|
|
26
|
+
question: str = Field(
|
|
27
|
+
..., description="The question that requires multi-hop reasoning."
|
|
28
|
+
)
|
|
29
|
+
reasoning_steps: List[ReasoningStep] = Field(
|
|
30
|
+
...,
|
|
31
|
+
description="The steps involved in reasoning to answer the question.",
|
|
32
|
+
)
|
|
33
|
+
answer: str = Field(
|
|
34
|
+
..., description="The answer to the multi-hop question."
|
|
35
|
+
)
|
|
36
|
+
supporting_facts: List[str] = Field(
|
|
37
|
+
..., description="Facts that support the reasoning and answer."
|
|
38
|
+
)
|
|
39
|
+
type: str = Field(description="The type of question-answer pair.")
|
|
40
|
+
|
|
41
|
+
class Config:
|
|
42
|
+
json_schema_extra: ClassVar[Dict[str, Any]] = {
|
|
43
|
+
"example": {
|
|
44
|
+
"question": "What is the capital of France?",
|
|
45
|
+
"reasoning_steps": [
|
|
46
|
+
{"step": "Identify the country France."},
|
|
47
|
+
{"step": "Find the capital city of France."},
|
|
48
|
+
],
|
|
49
|
+
"answer": "Paris",
|
|
50
|
+
"supporting_facts": [
|
|
51
|
+
"France is a country in Europe.",
|
|
52
|
+
"Paris is the capital city of France.",
|
|
53
|
+
],
|
|
54
|
+
"type": "multi_hop_qa",
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class ContextPrompt(BaseModel):
|
|
60
|
+
main_context: str = Field(
|
|
61
|
+
...,
|
|
62
|
+
description="The main context for generating"
|
|
63
|
+
" the question-answer pair.",
|
|
64
|
+
)
|
|
65
|
+
related_contexts: Optional[List[str]] = Field(
|
|
66
|
+
default=None,
|
|
67
|
+
description="Additional contexts related to the main context.",
|
|
68
|
+
)
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
|
|
15
|
+
import random
|
|
16
|
+
|
|
17
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
18
|
+
|
|
19
|
+
from camel.agents.multi_hop_generator_agent import MultiHopGeneratorAgent
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ProcessorConfig(BaseModel):
|
|
23
|
+
r"""Data processing configuration class"""
|
|
24
|
+
|
|
25
|
+
def __repr__(self):
|
|
26
|
+
return "MultiHopGeneratorAgent()"
|
|
27
|
+
|
|
28
|
+
model_config = ConfigDict(
|
|
29
|
+
validate_assignment=True,
|
|
30
|
+
frozen=False,
|
|
31
|
+
protected_namespaces=(),
|
|
32
|
+
arbitrary_types_allowed=True,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
seed: int = Field( # Generate a random seed for reproducibility
|
|
36
|
+
default_factory=lambda: random.randint(0, 1000),
|
|
37
|
+
description="Random seed for reproducibility",
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
min_length: int = Field(
|
|
41
|
+
default=50, description="Minimum text length", ge=0
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
max_length: int = Field(
|
|
45
|
+
default=512, description="Maximum text length", gt=0
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
quality_threshold: float = Field(
|
|
49
|
+
default=0.7,
|
|
50
|
+
description="Quality threshold for processing",
|
|
51
|
+
ge=0.0,
|
|
52
|
+
le=1.0,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
complexity_threshold: float = Field(
|
|
56
|
+
default=0.5,
|
|
57
|
+
description="Complexity threshold for processing",
|
|
58
|
+
ge=0.0,
|
|
59
|
+
le=1.0,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
dataset_size: int = Field(
|
|
63
|
+
default=1000, description="Target size of the dataset", gt=0
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
use_ai_model: bool = Field(
|
|
67
|
+
default=True, description="Whether to use AI model in processing"
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
hop_generating_agent: MultiHopGeneratorAgent = Field(
|
|
71
|
+
default_factory=lambda: MultiHopGeneratorAgent(),
|
|
72
|
+
description="Agent for generating multi-hop text",
|
|
73
|
+
)
|
|
@@ -36,6 +36,7 @@ class GoogleScholarToolkit(BaseToolkit):
|
|
|
36
36
|
self,
|
|
37
37
|
author_identifier: str,
|
|
38
38
|
is_author_name: bool = False,
|
|
39
|
+
use_free_proxies: bool = False,
|
|
39
40
|
proxy_http: Optional[str] = None,
|
|
40
41
|
proxy_https: Optional[str] = None,
|
|
41
42
|
) -> None:
|
|
@@ -46,6 +47,8 @@ class GoogleScholarToolkit(BaseToolkit):
|
|
|
46
47
|
of the author to search for.
|
|
47
48
|
is_author_name (bool): Flag to indicate if the identifier is a
|
|
48
49
|
name. (default: :obj:`False`)
|
|
50
|
+
use_free_proxies (bool): Whether to use Free Proxies.
|
|
51
|
+
(default: :obj:`False`)
|
|
49
52
|
proxy_http ( Optional[str]): Proxy http address pass to pg.
|
|
50
53
|
SingleProxy. (default: :obj:`None`)
|
|
51
54
|
proxy_https ( Optional[str]): Proxy https address pass to pg.
|
|
@@ -53,6 +56,12 @@ class GoogleScholarToolkit(BaseToolkit):
|
|
|
53
56
|
"""
|
|
54
57
|
from scholarly import ProxyGenerator, scholarly
|
|
55
58
|
|
|
59
|
+
# Set Free Proxies is needed
|
|
60
|
+
if use_free_proxies:
|
|
61
|
+
pg = ProxyGenerator()
|
|
62
|
+
pg.FreeProxies()
|
|
63
|
+
scholarly.use_proxy(pg)
|
|
64
|
+
|
|
56
65
|
# Set Proxy is HTTP or HTTPS provided
|
|
57
66
|
if proxy_http or proxy_https:
|
|
58
67
|
pg = ProxyGenerator()
|
camel/types/__init__.py
CHANGED
|
@@ -33,10 +33,11 @@ from .openai_types import (
|
|
|
33
33
|
ChatCompletion,
|
|
34
34
|
ChatCompletionAssistantMessageParam,
|
|
35
35
|
ChatCompletionChunk,
|
|
36
|
-
ChatCompletionFunctionMessageParam,
|
|
37
36
|
ChatCompletionMessage,
|
|
38
37
|
ChatCompletionMessageParam,
|
|
38
|
+
ChatCompletionMessageToolCall,
|
|
39
39
|
ChatCompletionSystemMessageParam,
|
|
40
|
+
ChatCompletionToolMessageParam,
|
|
40
41
|
ChatCompletionUserMessageParam,
|
|
41
42
|
Choice,
|
|
42
43
|
CompletionUsage,
|
|
@@ -62,7 +63,8 @@ __all__ = [
|
|
|
62
63
|
'ChatCompletionSystemMessageParam',
|
|
63
64
|
'ChatCompletionUserMessageParam',
|
|
64
65
|
'ChatCompletionAssistantMessageParam',
|
|
65
|
-
'
|
|
66
|
+
'ChatCompletionToolMessageParam',
|
|
67
|
+
'ChatCompletionMessageToolCall',
|
|
66
68
|
'CompletionUsage',
|
|
67
69
|
'OpenAIImageType',
|
|
68
70
|
'OpenAIVisionDetailType',
|
camel/types/enums.py
CHANGED
|
@@ -142,6 +142,12 @@ class ModelType(UnifiedModelType, Enum):
|
|
|
142
142
|
# DeepSeek models
|
|
143
143
|
DEEPSEEK_CHAT = "deepseek-chat"
|
|
144
144
|
|
|
145
|
+
# InternLM models
|
|
146
|
+
INTERNLM3_LATEST = "internlm3-latest"
|
|
147
|
+
INTERNLM3_8B_INSTRUCT = "internlm3-8b-instruct"
|
|
148
|
+
INTERNLM2_5_LATEST = "internlm2.5-latest"
|
|
149
|
+
INTERNLM2_PRO_CHAT = "internlm2-pro-chat"
|
|
150
|
+
|
|
145
151
|
def __str__(self):
|
|
146
152
|
return self.value
|
|
147
153
|
|
|
@@ -161,7 +167,15 @@ class ModelType(UnifiedModelType, Enum):
|
|
|
161
167
|
@property
|
|
162
168
|
def support_native_tool_calling(self) -> bool:
|
|
163
169
|
return any(
|
|
164
|
-
[
|
|
170
|
+
[
|
|
171
|
+
self.is_openai,
|
|
172
|
+
self.is_gemini,
|
|
173
|
+
self.is_mistral,
|
|
174
|
+
self.is_qwen,
|
|
175
|
+
self.is_deepseek,
|
|
176
|
+
self.is_cohere,
|
|
177
|
+
self.is_internlm,
|
|
178
|
+
]
|
|
165
179
|
)
|
|
166
180
|
|
|
167
181
|
@property
|
|
@@ -353,6 +367,15 @@ class ModelType(UnifiedModelType, Enum):
|
|
|
353
367
|
ModelType.DEEPSEEK_CHAT,
|
|
354
368
|
}
|
|
355
369
|
|
|
370
|
+
@property
|
|
371
|
+
def is_internlm(self) -> bool:
|
|
372
|
+
return self in {
|
|
373
|
+
ModelType.INTERNLM3_LATEST,
|
|
374
|
+
ModelType.INTERNLM3_8B_INSTRUCT,
|
|
375
|
+
ModelType.INTERNLM2_5_LATEST,
|
|
376
|
+
ModelType.INTERNLM2_PRO_CHAT,
|
|
377
|
+
}
|
|
378
|
+
|
|
356
379
|
@property
|
|
357
380
|
def token_limit(self) -> int:
|
|
358
381
|
r"""Returns the maximum token limit for a given model.
|
|
@@ -411,6 +434,10 @@ class ModelType(UnifiedModelType, Enum):
|
|
|
411
434
|
ModelType.NVIDIA_MISTRAL_LARGE,
|
|
412
435
|
ModelType.NVIDIA_MIXTRAL_8X7B,
|
|
413
436
|
ModelType.QWEN_QWQ_32B,
|
|
437
|
+
ModelType.INTERNLM3_8B_INSTRUCT,
|
|
438
|
+
ModelType.INTERNLM3_LATEST,
|
|
439
|
+
ModelType.INTERNLM2_5_LATEST,
|
|
440
|
+
ModelType.INTERNLM2_PRO_CHAT,
|
|
414
441
|
}:
|
|
415
442
|
return 32_768
|
|
416
443
|
elif self in {
|
|
@@ -634,6 +661,7 @@ class ModelPlatformType(Enum):
|
|
|
634
661
|
NVIDIA = "nvidia"
|
|
635
662
|
DEEPSEEK = "deepseek"
|
|
636
663
|
SGLANG = "sglang"
|
|
664
|
+
INTERNLM = "internlm"
|
|
637
665
|
|
|
638
666
|
@property
|
|
639
667
|
def is_openai(self) -> bool:
|
|
@@ -736,6 +764,11 @@ class ModelPlatformType(Enum):
|
|
|
736
764
|
r"""Returns whether this platform is DeepSeek."""
|
|
737
765
|
return self is ModelPlatformType.DEEPSEEK
|
|
738
766
|
|
|
767
|
+
@property
|
|
768
|
+
def is_internlm(self) -> bool:
|
|
769
|
+
r"""Returns whether this platform is InternLM."""
|
|
770
|
+
return self is ModelPlatformType.INTERNLM
|
|
771
|
+
|
|
739
772
|
|
|
740
773
|
class AudioModelType(Enum):
|
|
741
774
|
TTS_1 = "tts-1"
|
camel/types/openai_types.py
CHANGED
|
@@ -16,10 +16,10 @@ from openai.types.chat.chat_completion import ChatCompletion, Choice
|
|
|
16
16
|
from openai.types.chat.chat_completion_assistant_message_param import (
|
|
17
17
|
ChatCompletionAssistantMessageParam,
|
|
18
18
|
)
|
|
19
|
-
from openai.types.chat.
|
|
20
|
-
|
|
21
|
-
ChatCompletionFunctionMessageParam,
|
|
19
|
+
from openai.types.chat.chat_completion_tool_message_param import (
|
|
20
|
+
ChatCompletionToolMessageParam,
|
|
22
21
|
)
|
|
22
|
+
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
|
23
23
|
from openai.types.chat.chat_completion_message import ChatCompletionMessage
|
|
24
24
|
from openai.types.chat.chat_completion_message_param import (
|
|
25
25
|
ChatCompletionMessageParam,
|
|
@@ -33,6 +33,7 @@ from openai.types.chat.chat_completion_user_message_param import (
|
|
|
33
33
|
from openai.types.completion_usage import CompletionUsage
|
|
34
34
|
from openai.types.chat import ParsedChatCompletion
|
|
35
35
|
from openai._types import NOT_GIVEN, NotGiven
|
|
36
|
+
from openai.types.chat import ChatCompletionMessageToolCall
|
|
36
37
|
|
|
37
38
|
Choice = Choice
|
|
38
39
|
ChatCompletion = ChatCompletion
|
|
@@ -42,7 +43,8 @@ ChatCompletionMessageParam = ChatCompletionMessageParam
|
|
|
42
43
|
ChatCompletionSystemMessageParam = ChatCompletionSystemMessageParam
|
|
43
44
|
ChatCompletionUserMessageParam = ChatCompletionUserMessageParam
|
|
44
45
|
ChatCompletionAssistantMessageParam = ChatCompletionAssistantMessageParam
|
|
45
|
-
|
|
46
|
+
ChatCompletionToolMessageParam = ChatCompletionToolMessageParam
|
|
47
|
+
ChatCompletionMessageToolCall = ChatCompletionMessageToolCall
|
|
46
48
|
CompletionUsage = CompletionUsage
|
|
47
49
|
NOT_GIVEN = NOT_GIVEN
|
|
48
50
|
NotGiven = NotGiven
|
|
@@ -113,6 +113,11 @@ class UnifiedModelType(str):
|
|
|
113
113
|
r"""Returns whether the model is a Qwen model."""
|
|
114
114
|
return True
|
|
115
115
|
|
|
116
|
+
@property
|
|
117
|
+
def is_internlm(self) -> bool:
|
|
118
|
+
r"""Returns whether the model is a InternLM model."""
|
|
119
|
+
return True
|
|
120
|
+
|
|
116
121
|
@property
|
|
117
122
|
def support_native_structured_output(self) -> bool:
|
|
118
123
|
r"""Returns whether the model supports native structured output."""
|