camel-ai 0.2.14__py3-none-any.whl → 0.2.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (68) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/chat_agent.py +362 -237
  3. camel/benchmarks/__init__.py +11 -1
  4. camel/benchmarks/apibank.py +560 -0
  5. camel/benchmarks/apibench.py +496 -0
  6. camel/benchmarks/gaia.py +2 -2
  7. camel/benchmarks/nexus.py +518 -0
  8. camel/datagen/__init__.py +21 -0
  9. camel/datagen/cotdatagen.py +448 -0
  10. camel/datagen/self_instruct/__init__.py +36 -0
  11. camel/datagen/self_instruct/filter/__init__.py +34 -0
  12. camel/datagen/self_instruct/filter/filter_function.py +208 -0
  13. camel/datagen/self_instruct/filter/filter_registry.py +56 -0
  14. camel/datagen/self_instruct/filter/instruction_filter.py +76 -0
  15. camel/datagen/self_instruct/self_instruct.py +393 -0
  16. camel/datagen/self_instruct/templates.py +384 -0
  17. camel/datahubs/huggingface.py +12 -2
  18. camel/datahubs/models.py +4 -2
  19. camel/embeddings/mistral_embedding.py +5 -1
  20. camel/embeddings/openai_compatible_embedding.py +6 -1
  21. camel/embeddings/openai_embedding.py +5 -1
  22. camel/interpreters/e2b_interpreter.py +5 -1
  23. camel/loaders/apify_reader.py +5 -1
  24. camel/loaders/chunkr_reader.py +5 -1
  25. camel/loaders/firecrawl_reader.py +0 -30
  26. camel/logger.py +11 -5
  27. camel/messages/conversion/sharegpt/hermes/hermes_function_formatter.py +4 -1
  28. camel/models/anthropic_model.py +5 -1
  29. camel/models/azure_openai_model.py +1 -2
  30. camel/models/cohere_model.py +5 -1
  31. camel/models/deepseek_model.py +5 -1
  32. camel/models/gemini_model.py +5 -1
  33. camel/models/groq_model.py +5 -1
  34. camel/models/mistral_model.py +5 -1
  35. camel/models/nemotron_model.py +5 -1
  36. camel/models/nvidia_model.py +5 -1
  37. camel/models/openai_model.py +5 -1
  38. camel/models/qwen_model.py +5 -1
  39. camel/models/reka_model.py +5 -1
  40. camel/models/reward/nemotron_model.py +5 -1
  41. camel/models/samba_model.py +5 -1
  42. camel/models/togetherai_model.py +5 -1
  43. camel/models/yi_model.py +5 -1
  44. camel/models/zhipuai_model.py +5 -1
  45. camel/retrievers/auto_retriever.py +8 -0
  46. camel/retrievers/vector_retriever.py +6 -3
  47. camel/schemas/openai_converter.py +5 -1
  48. camel/societies/role_playing.py +4 -4
  49. camel/societies/workforce/workforce.py +2 -2
  50. camel/storages/graph_storages/nebula_graph.py +119 -27
  51. camel/storages/graph_storages/neo4j_graph.py +138 -0
  52. camel/toolkits/__init__.py +2 -0
  53. camel/toolkits/arxiv_toolkit.py +20 -3
  54. camel/toolkits/function_tool.py +61 -61
  55. camel/toolkits/meshy_toolkit.py +5 -1
  56. camel/toolkits/notion_toolkit.py +1 -1
  57. camel/toolkits/openbb_toolkit.py +869 -0
  58. camel/toolkits/search_toolkit.py +91 -5
  59. camel/toolkits/stripe_toolkit.py +5 -1
  60. camel/toolkits/twitter_toolkit.py +24 -16
  61. camel/types/enums.py +7 -1
  62. camel/types/unified_model_type.py +5 -0
  63. camel/utils/__init__.py +4 -0
  64. camel/utils/commons.py +142 -20
  65. {camel_ai-0.2.14.dist-info → camel_ai-0.2.15.dist-info}/METADATA +16 -5
  66. {camel_ai-0.2.14.dist-info → camel_ai-0.2.15.dist-info}/RECORD +68 -55
  67. {camel_ai-0.2.14.dist-info → camel_ai-0.2.15.dist-info}/LICENSE +0 -0
  68. {camel_ai-0.2.14.dist-info → camel_ai-0.2.15.dist-info}/WHEEL +0 -0
@@ -0,0 +1,393 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+ import json
16
+ import os
17
+ import random
18
+ from typing import Any, Dict, List, Optional
19
+
20
+ from pydantic import BaseModel, Field
21
+
22
+ from camel.agents import ChatAgent
23
+
24
+ from .filter import RougeSimilarityFilter
25
+ from .filter.instruction_filter import InstructionFilter
26
+ from .templates import SelfInstructTemplates
27
+
28
+
29
+ class SelfInstructPipeline:
30
+ r"""A pipeline to generate and manage machine-generated instructions for
31
+ tasks, combining human and machine task samples.
32
+
33
+ Args:
34
+ agent (ChatAgent): The agent used to interact and generate
35
+ instructions.
36
+ seed (str): The path to the human-written instructions.
37
+ num_machine_instructions (int): Number of machine-generated
38
+ instructions to generate. (default::obj:`5`)
39
+ data_output_path (Optional[str]): Path to save the generated data.
40
+ (default::obj:`./data_output.json`)
41
+ human_to_machine_ratio (tuple): Ratio of human to machine tasks used
42
+ for instruction generation. (default::obj:`(6, 2)`)
43
+ instruction_filter (InstructionFilter): A filter to validate
44
+ generated instructions. (default::obj:`None`)
45
+ filter_config (Optional[Dict[str, Dict[str, Any]]]): configuration
46
+ for the filter functions registered in FILE_REGISTRY.
47
+ (default::obj:`None`)
48
+ """
49
+
50
+ def __init__(
51
+ self,
52
+ agent: ChatAgent,
53
+ seed: str,
54
+ num_machine_instructions: int = 5,
55
+ data_output_path: Optional[str] = './data_output.json',
56
+ human_to_machine_ratio: tuple = (6, 2),
57
+ instruction_filter: Optional[InstructionFilter] = None,
58
+ filter_config: Optional[Dict[str, Dict[str, Any]]] = None,
59
+ ):
60
+ self.agent = agent
61
+ self.num_machine_instructions = num_machine_instructions
62
+ self.data_output_path = data_output_path
63
+ self.human_to_machine_ratio = human_to_machine_ratio
64
+ self.human_tasks: List[Dict] = []
65
+ self.machine_tasks: List[Dict] = []
66
+ self.load_seed(seed)
67
+ default_config: Dict[str, Dict[str, Any]] = {
68
+ "length": {},
69
+ "keyword": {},
70
+ "punctuation": {},
71
+ "non_english": {},
72
+ "rouge_similarity": {},
73
+ }
74
+
75
+ if instruction_filter is not None:
76
+ # custom
77
+ self.instruction_filter = instruction_filter
78
+ else:
79
+ # default
80
+ config_to_use = (
81
+ filter_config if filter_config is not None else default_config
82
+ )
83
+ self.instruction_filter = InstructionFilter(config_to_use)
84
+
85
+ def load_seed(self, path: str):
86
+ r"""Load seed tasks from a file. Defaults to a predefined seed file if
87
+ no path is provided.
88
+
89
+ Args:
90
+ path (str): Path to the seed file.
91
+
92
+ Raises:
93
+ FileNotFoundError: If the seed file does not exist.
94
+ """
95
+
96
+ if os.path.exists(path):
97
+ with open(path, 'r') as f:
98
+ for line in f:
99
+ line = line.strip()
100
+ if line:
101
+ self.human_tasks.append(json.loads(line))
102
+ else:
103
+ raise FileNotFoundError(f"Seed file not found at path: {path}")
104
+
105
+ def sample_human_tasks(self, count: int) -> List[dict]:
106
+ r"""Sample a specified number of human tasks from the loaded seed.
107
+
108
+ Args:
109
+ count (int): Number of human tasks to sample.
110
+
111
+ Returns:
112
+ List[dict]: A list of sampled human tasks.
113
+ """
114
+ return random.sample(
115
+ self.human_tasks, min(count, len(self.human_tasks))
116
+ )
117
+
118
+ def sample_machine_tasks(self, count: int) -> List[dict]:
119
+ r"""Sample a specified number of machine tasks.
120
+
121
+ Args:
122
+ count (int): Number of machine tasks to sample.
123
+
124
+ Returns:
125
+ List[dict]: A list of sampled machine tasks, with placeholders if
126
+ insufficient tasks are available.
127
+ """
128
+ available_machine_tasks = len(self.machine_tasks)
129
+ if available_machine_tasks < count:
130
+ sampled_tasks = self.machine_tasks.copy()
131
+ placeholders_needed = count - available_machine_tasks
132
+ sampled_tasks.extend(
133
+ [{'instruction': ""} for _ in range(placeholders_needed)]
134
+ )
135
+ return sampled_tasks
136
+
137
+ return random.sample(self.machine_tasks, count)
138
+
139
+ def generate_machine_instruction(self) -> str:
140
+ r"""Generate a machine instruction using the agent.
141
+
142
+ Combines human and machine tasks based on the configured ratio to
143
+ create a prompt for instruction generation.
144
+
145
+ Returns:
146
+ str: A machine-generated instruction.
147
+ """
148
+
149
+ sampled_human_tasks = self.sample_human_tasks(
150
+ self.human_to_machine_ratio[0]
151
+ )
152
+ sampled_machine_tasks = self.sample_machine_tasks(
153
+ self.human_to_machine_ratio[1]
154
+ )
155
+ prompt = "Below are some tasks:\n\n"
156
+
157
+ for idx, task in enumerate(sampled_human_tasks, 1):
158
+ prompt += f"Task {idx}: {task['instruction']}\n"
159
+
160
+ current_task_number = len(sampled_human_tasks) + 1
161
+ for idx, task in enumerate(sampled_machine_tasks, current_task_number):
162
+ prompt += f"Task {idx}: {task['instruction']}\n"
163
+
164
+ task_num = len(sampled_human_tasks) + len(sampled_machine_tasks) + 1
165
+ prompt += f"Task {task_num}:"
166
+ prompt += (
167
+ "\nNow, please produce exactly one new task that fits the "
168
+ "style of the ones above.\n Do not include any task numbering or "
169
+ "labels like 'Task X:'. Just write the task itself.\n"
170
+ "The task should be a single sentence.\n\n"
171
+ )
172
+
173
+ response = self.agent.step(prompt)
174
+ generated_tasks = [
175
+ line.strip()
176
+ for line in response.msgs[0].content.split("\n")
177
+ if line.strip()
178
+ ]
179
+ return generated_tasks[0]
180
+
181
+ def identify_instruction(self, instruction: str) -> bool:
182
+ r"""Determine if the given instruction is a classification task.
183
+
184
+ Args:
185
+ instruction (str): The instruction to classify.
186
+
187
+ Returns:
188
+ bool: True if the instruction is a classification task,
189
+ otherwise False.
190
+ """
191
+ clf_prompt = (
192
+ SelfInstructTemplates.clf_template
193
+ + f"Task: {instruction}\nIs it classification?"
194
+ + "\nRespond in the following structured format:"
195
+ "\n{\n \"answer\": true\n}\n"
196
+ "or\n"
197
+ "{\n \"answer\": false\n}\n"
198
+ )
199
+ response = self.agent.step(clf_prompt)
200
+ try:
201
+ structured_response = AgentResponse.parse_raw(
202
+ response.msgs[0].content.strip()
203
+ )
204
+ return structured_response.answer
205
+ except ValueError as e:
206
+ print(f"Error parsing agent response: {e}")
207
+ return False
208
+
209
+ def generate_machine_instances(self):
210
+ r"""Generate instances for each machine task based on its
211
+ classification status.
212
+ """
213
+ for instruction in self.machine_tasks:
214
+ instance = self.generate_machine_instance(
215
+ instruction['instruction'], instruction['is_classification']
216
+ )
217
+ instruction['instances'] = instance
218
+
219
+ def generate_machine_instance(
220
+ self, instruction: str, classification: bool
221
+ ) -> list[dict]:
222
+ r"""Generate instances for a given instruction.
223
+
224
+ Args:
225
+ instruction (str): The instruction to create instances for.
226
+ classification (bool): Whether the instruction is a classification
227
+ task.
228
+
229
+ Returns:
230
+ List[dict]: A list of generated instances in input-output format.
231
+ """
232
+ if classification:
233
+ prompt = (
234
+ SelfInstructTemplates.output_first_template_for_clf.format(
235
+ instruction=instruction
236
+ )
237
+ )
238
+ else:
239
+ prompt = SelfInstructTemplates.input_first_template_for_gen.format(
240
+ instruction=instruction
241
+ )
242
+
243
+ response = self.agent.step(prompt)
244
+ generated_text = response.msgs[0].content.strip()
245
+
246
+ if classification:
247
+ return self.parse_classification_output(generated_text)
248
+ else:
249
+ return self.parse_non_classification_output(generated_text)
250
+
251
+ def parse_classification_output(
252
+ self, generated_text: str
253
+ ) -> List[Dict[str, str]]:
254
+ r"""Parse the generated text for classification tasks into input-output
255
+ pairs.
256
+
257
+ Args:
258
+ generated_text (str): The raw text generated by the agent for
259
+ classification tasks.
260
+
261
+ Returns:
262
+ List[Dict[str, str]]: A list of dictionaries with 'input' and
263
+ 'output' keys.
264
+ """
265
+ instances = []
266
+ lines = generated_text.split("\n")
267
+ current_label = None
268
+ current_input = None
269
+
270
+ for line in lines:
271
+ line = line.strip()
272
+ if not line:
273
+ continue
274
+
275
+ if line.startswith("Class label:"):
276
+ if current_label and current_input:
277
+ instances.append(
278
+ {
279
+ "input": current_input.strip(),
280
+ "output": current_label.strip(),
281
+ }
282
+ )
283
+
284
+ current_label = line[len("Class label:") :].strip()
285
+ current_input = None
286
+ else:
287
+ if current_input is None:
288
+ current_input = line
289
+ else:
290
+ current_input += f"\n{line}"
291
+ if current_label and current_input:
292
+ instances.append(
293
+ {
294
+ "input": current_input.strip(),
295
+ "output": current_label.strip(),
296
+ }
297
+ )
298
+
299
+ return instances
300
+
301
+ def parse_non_classification_output(
302
+ self, generated_text: str
303
+ ) -> List[Dict[str, str]]:
304
+ r"""Parse the generated text for non-classification tasks into
305
+ input-output pairs.
306
+
307
+ Args:
308
+ generated_text (str): The raw text generated by the agent for
309
+ non-classification tasks.
310
+
311
+ Returns:
312
+ List[Dict[str, str]]: A list of dictionaries with 'input' and
313
+ 'output' keys.
314
+ """
315
+ instances = []
316
+ prev = 0
317
+ lines = generated_text.split("\n")
318
+ i = 0
319
+
320
+ while i < len(lines):
321
+ line = lines[i].strip()
322
+
323
+ if line.startswith("Example "):
324
+ prev = i + 1
325
+
326
+ elif line.startswith("Output:"):
327
+ instance_input = '\n'.join(lines[prev:i]).strip()
328
+ if instance_input.startswith("Input: "):
329
+ instance_input = instance_input[len("Input: ") :].strip()
330
+ else:
331
+ instance_input = instance_input.strip()
332
+
333
+ instance_output = line[len("Output:") :].strip()
334
+ i += 1
335
+ while i < len(lines) and not lines[i].strip().startswith(
336
+ "Example "
337
+ ):
338
+ instance_output += '\n' + lines[i].strip()
339
+ i += 1
340
+ i -= 1
341
+
342
+ instance_output = instance_output.strip()
343
+
344
+ instances.append(
345
+ {"input": instance_input, "output": instance_output}
346
+ )
347
+
348
+ prev = i + 1
349
+ i += 1
350
+
351
+ if not instances:
352
+ instances.append({"input": "", "output": "No valid output found."})
353
+
354
+ return instances
355
+
356
+ def construct_data(self):
357
+ r"""Save the machine-generated tasks to the specified output path
358
+ in JSON format.
359
+ """
360
+ with open(self.data_output_path, 'w') as f:
361
+ json.dump(self.machine_tasks, f, indent=4)
362
+
363
+ def generate(self):
364
+ r"""Execute the entire pipeline to generate machine instructions
365
+ and instances.
366
+ """
367
+ while len(self.machine_tasks) < self.num_machine_instructions:
368
+ existing_instructions = [
369
+ t["instruction"] for t in self.human_tasks
370
+ ] + [t["instruction"] for t in self.machine_tasks]
371
+ for f in self.instruction_filter.filters:
372
+ if isinstance(f, RougeSimilarityFilter):
373
+ f.existing_instructions = existing_instructions
374
+ instruction = self.generate_machine_instruction()
375
+ if self.instruction_filter.filter(instruction):
376
+ instruction_dict = {
377
+ "id": f"machine_task_{len(self.machine_tasks) + 1}",
378
+ "instruction": instruction,
379
+ "is_classification": self.identify_instruction(
380
+ instruction
381
+ ),
382
+ }
383
+ self.machine_tasks.append(instruction_dict)
384
+ self.generate_machine_instances()
385
+ self.construct_data()
386
+
387
+
388
+ class AgentResponse(BaseModel):
389
+ answer: bool = Field(
390
+ ...,
391
+ description="Indicates whether the task is "
392
+ "classification (True/False).",
393
+ )