camel-ai 0.2.15a0__py3-none-any.whl → 0.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (61) hide show
  1. camel/__init__.py +1 -1
  2. camel/benchmarks/__init__.py +11 -1
  3. camel/benchmarks/apibank.py +560 -0
  4. camel/benchmarks/apibench.py +496 -0
  5. camel/benchmarks/gaia.py +2 -2
  6. camel/benchmarks/nexus.py +518 -0
  7. camel/datagen/__init__.py +6 -2
  8. camel/datagen/{o1datagen.py → cotdatagen.py} +19 -6
  9. camel/datagen/self_instruct/__init__.py +36 -0
  10. camel/datagen/self_instruct/filter/__init__.py +34 -0
  11. camel/datagen/self_instruct/filter/filter_function.py +216 -0
  12. camel/datagen/self_instruct/filter/filter_registry.py +56 -0
  13. camel/datagen/self_instruct/filter/instruction_filter.py +81 -0
  14. camel/datagen/self_instruct/self_instruct.py +393 -0
  15. camel/datagen/self_instruct/templates.py +384 -0
  16. camel/datahubs/huggingface.py +12 -2
  17. camel/datahubs/models.py +2 -3
  18. camel/embeddings/mistral_embedding.py +5 -1
  19. camel/embeddings/openai_compatible_embedding.py +6 -1
  20. camel/embeddings/openai_embedding.py +5 -1
  21. camel/interpreters/e2b_interpreter.py +5 -1
  22. camel/loaders/apify_reader.py +5 -1
  23. camel/loaders/chunkr_reader.py +5 -1
  24. camel/loaders/firecrawl_reader.py +0 -30
  25. camel/logger.py +11 -5
  26. camel/models/anthropic_model.py +5 -1
  27. camel/models/azure_openai_model.py +1 -2
  28. camel/models/cohere_model.py +5 -1
  29. camel/models/deepseek_model.py +5 -1
  30. camel/models/gemini_model.py +5 -1
  31. camel/models/groq_model.py +5 -1
  32. camel/models/mistral_model.py +5 -1
  33. camel/models/nemotron_model.py +5 -1
  34. camel/models/nvidia_model.py +5 -1
  35. camel/models/openai_model.py +5 -1
  36. camel/models/qwen_model.py +5 -1
  37. camel/models/reka_model.py +5 -1
  38. camel/models/reward/nemotron_model.py +5 -1
  39. camel/models/samba_model.py +5 -1
  40. camel/models/togetherai_model.py +5 -1
  41. camel/models/yi_model.py +5 -1
  42. camel/models/zhipuai_model.py +5 -1
  43. camel/schemas/openai_converter.py +5 -1
  44. camel/storages/graph_storages/nebula_graph.py +89 -20
  45. camel/storages/graph_storages/neo4j_graph.py +138 -0
  46. camel/toolkits/__init__.py +4 -0
  47. camel/toolkits/arxiv_toolkit.py +20 -3
  48. camel/toolkits/dappier_toolkit.py +196 -0
  49. camel/toolkits/function_tool.py +61 -61
  50. camel/toolkits/meshy_toolkit.py +5 -1
  51. camel/toolkits/notion_toolkit.py +1 -1
  52. camel/toolkits/openbb_toolkit.py +869 -0
  53. camel/toolkits/search_toolkit.py +91 -5
  54. camel/toolkits/stripe_toolkit.py +5 -1
  55. camel/toolkits/twitter_toolkit.py +24 -16
  56. camel/utils/__init__.py +2 -0
  57. camel/utils/commons.py +104 -19
  58. {camel_ai-0.2.15a0.dist-info → camel_ai-0.2.16.dist-info}/METADATA +16 -4
  59. {camel_ai-0.2.15a0.dist-info → camel_ai-0.2.16.dist-info}/RECORD +61 -49
  60. {camel_ai-0.2.15a0.dist-info → camel_ai-0.2.16.dist-info}/LICENSE +0 -0
  61. {camel_ai-0.2.15a0.dist-info → camel_ai-0.2.16.dist-info}/WHEEL +0 -0
@@ -0,0 +1,496 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+ import json
16
+ import logging
17
+ import random
18
+ from pathlib import Path
19
+ from typing import Any, Dict, Literal, Optional
20
+
21
+ import tree_sitter_python as tspython
22
+ from tqdm import tqdm
23
+ from tree_sitter import Language, Parser
24
+
25
+ from camel.agents import ChatAgent
26
+ from camel.benchmarks.base import BaseBenchmark
27
+ from camel.messages import BaseMessage
28
+ from camel.utils import download_github_subdirectory
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ dataset_mapping = {
33
+ "huggingface": {
34
+ "api": "huggingface_api.jsonl",
35
+ "eval": "huggingface_eval.json",
36
+ "train": "huggingface_train.json",
37
+ "questions": "questions_huggingface_0_shot.jsonl",
38
+ },
39
+ "tensorflowhub": {
40
+ "api": "tensorflowhub_api.jsonl",
41
+ "eval": "tensorflow_eval.json",
42
+ "train": "tensorflow_train.json",
43
+ "questions": "questions_tensorflowhub_0_shot.jsonl",
44
+ },
45
+ "torchhub": {
46
+ "api": "torchhub_api.jsonl",
47
+ "eval": "torchhub_eval.json",
48
+ "train": "torchhub_train.json",
49
+ "questions": "questions_torchhub_0_shot.jsonl",
50
+ },
51
+ }
52
+
53
+
54
+ # This function is migrated from the original repo:
55
+ # https://github.com/ShishirPatil/gorilla
56
+ def encode_question(question: str, dataset_name: str) -> str:
57
+ r"""Encode multiple prompt instructions into a single string."""
58
+
59
+ if dataset_name == "torchhub":
60
+ domains = "1. $DOMAIN is inferred from the task description and \
61
+ should include one of {Classification, Semantic Segmentation, \
62
+ Object Detection, Audio Separation, Video Classification, \
63
+ Text-to-Speech}."
64
+ elif dataset_name == "huggingface":
65
+ domains = "1. $DOMAIN should include one of {Multimodal Feature \
66
+ Extraction, Multimodal Text-to-Image, Multimodal \
67
+ Image-to-Text, Multimodal Text-to-Video, \
68
+ Multimodal Visual Question Answering, Multimodal Document \
69
+ Question Answer, Multimodal Graph Machine Learning, \
70
+ Computer Vision Depth Estimation, Computer Vision Image \
71
+ Classification, Computer Vision Object Detection, \
72
+ Computer Vision Image Segmentation, Computer Vision \
73
+ Image-to-Image, Computer Vision Unconditional \
74
+ Image Generation, Computer Vision Video Classification, \
75
+ Computer Vision Zero-Shor Image Classification, \
76
+ Natural Language Processing Text Classification, \
77
+ Natural Language Processing Token Classification, \
78
+ Natural Language Processing Table Question Answering, \
79
+ Natural Language Processing Question Answering, \
80
+ Natural Language Processing, Zero-Shot Classification \
81
+ Natural Language Processing Translation, Natural Language \
82
+ Processing Summarization, Natural Language Processing \
83
+ Conversational, Natural Language Processing Text \
84
+ Generation, Natural Language Processing Fill-Mask, \
85
+ Natural Language Processing Text2Text Generation, \
86
+ Natural Language Processing Sentence Similarity, \
87
+ Audio Text-to-Speech, Audio Automatic Speech Recognition, \
88
+ Audio Audio-to-Audio, Audio Audio Classification, \
89
+ Audio Voice Activity Detection, Tabular Tabular \
90
+ Classification, Tabular Tabular Regression, \
91
+ Reinforcement Learning Reinforcement Learning, \
92
+ Reinforcement Learning Robotics }"
93
+ elif dataset_name == "tensorflowhub":
94
+ domains = "1. $DOMAIN is inferred from the task description \
95
+ and should include one of {text-sequence-alignment, \
96
+ text-embedding, text-language-model, text-preprocessing, \
97
+ text-classification, text-generation, text-question-answering, \
98
+ text-retrieval-question-answering, text-segmentation, \
99
+ text-to-mel, image-classification, image-feature-vector, \
100
+ image-object-detection, image-segmentation, \
101
+ image-generator, image-pose-detection, image-rnn-agent, \
102
+ image-augmentation, image-classifier, image-style-transfer, \
103
+ image-aesthetic-quality, image-depth-estimation, \
104
+ image-super-resolution, image-deblurring, image-extrapolation, \
105
+ image-text-recognition, image-dehazing, image-deraining, \
106
+ image-enhancemenmt, image-classification-logits, \
107
+ image-frame-interpolation, image-text-detection, image-denoising, \
108
+ image-others, video-classification, video-feature-extraction, \
109
+ video-generation, video-audio-text, video-text, \
110
+ audio-embedding, audio-event-classification, audio-command-detection, \
111
+ audio-paralinguists-classification, audio-speech-to-text, \
112
+ audio-speech-synthesis, audio-synthesis, audio-pitch-extraction}"
113
+ else:
114
+ logger.info("Error: API name is not supported.")
115
+
116
+ prompt = (
117
+ question
118
+ + "\nWrite a python program in 1 to 2 lines to call API in "
119
+ + dataset_name
120
+ + ".\n\nThe answer should follow the format: <<<domain>>> $DOMAIN, \
121
+ <<<api_call>>>: $API_CALL, <<<api_provider>>>: $API_PROVIDER, \
122
+ <<<explanation>>>: $EXPLANATION, <<<code>>>: $CODE}. \
123
+ Here are the requirements:\n"
124
+ + domains
125
+ + "\n2. The $API_CALL should have only 1 line of code \
126
+ that calls api.\n 3. The $API_PROVIDER should be the \
127
+ programming framework used.\n4. $EXPLANATION should be \
128
+ a step-by-step explanation.\n5. The $CODE is the python code.\n6. \
129
+ Do not repeat the format in your answer."
130
+ )
131
+ return prompt
132
+
133
+
134
+ class APIBenchBenchmark(BaseBenchmark):
135
+ r"""APIBench Benchmark adopted from `Gorilla: Large Language Model
136
+ Connected with Massive APIs`
137
+ <https://huggingface.co/datasets/gorilla-llm/APIBench>.
138
+
139
+ Args:
140
+ data_dir (str): The directory to save the data.
141
+ save_to (str): The file to save the results.
142
+ processes (int, optional): The number of processes to use.
143
+ (default: :obj:`1`)
144
+ """
145
+
146
+ # TODO: Integrate retriever (pending)
147
+
148
+ def __init__(
149
+ self,
150
+ data_dir: str,
151
+ save_to: str,
152
+ processes: int = 1,
153
+ ):
154
+ r"""Initialize the APIBench benchmark.
155
+
156
+ Args:
157
+ data_dir (str): The directory to save the data.
158
+ save_to (str): The file to save the results.
159
+ processes (int, optional): The number of processes to use for
160
+ parallel processing. (default: :obj:`1`)
161
+ """
162
+ super().__init__("apibench", data_dir, save_to, processes)
163
+
164
+ def download(self):
165
+ r"""Download the APIBench dataset."""
166
+ from huggingface_hub import snapshot_download
167
+
168
+ snapshot_download(
169
+ repo_id="gorilla-llm/APIBench",
170
+ repo_type="dataset",
171
+ local_dir=self.data_dir,
172
+ local_dir_use_symlinks=True,
173
+ )
174
+
175
+ repo = "ShishirPatil/gorilla"
176
+ subdir = "eval/eval-data/questions"
177
+ data_dir = self.data_dir
178
+
179
+ download_github_subdirectory(repo, subdir, data_dir)
180
+
181
+ def load(self, dataset_name: str, force_download: bool = False): # type: ignore[override]
182
+ r"""Load the APIBench Benchmark dataset.
183
+
184
+ Args:
185
+ dataset_name (str): Name of the specific dataset to be loaded.
186
+ force_download (bool, optional): Whether to force
187
+ download the data. (default: :obj:`False`)
188
+ """
189
+
190
+ if force_download:
191
+ logger.info("Force downloading data.")
192
+ self.download()
193
+
194
+ def load_json_lines(file_path: Path):
195
+ r"""Helper function to load JSON lines from a file."""
196
+ try:
197
+ with open(file_path, "r") as f:
198
+ return [json.loads(line) for line in f]
199
+ except FileNotFoundError:
200
+ raise FileNotFoundError(f"File not found: {file_path}")
201
+ except json.JSONDecodeError as e:
202
+ raise ValueError(
203
+ f"Error decoding JSON in file {file_path}: {e}"
204
+ )
205
+
206
+ dataset_path = self.data_dir / dataset_name
207
+ if not dataset_path.exists():
208
+ raise FileNotFoundError(
209
+ f"Dataset directory does not exist: {dataset_path}"
210
+ )
211
+
212
+ for label in ['api', 'eval', 'questions']:
213
+ file_name = dataset_mapping[dataset_name][label]
214
+ file_path = (
215
+ dataset_path / file_name
216
+ if label == 'questions'
217
+ else self.data_dir / file_name
218
+ )
219
+
220
+ # Load data based on label type
221
+ if label in ['api', 'questions', 'eval']:
222
+ data = load_json_lines(file_path)
223
+
224
+ if label == 'eval':
225
+ # Extract 'api_data' specifically for eval label
226
+ data = [item['api_data'] for item in data]
227
+
228
+ self._data[label] = data
229
+ else:
230
+ raise ValueError(f"Unknown label: {label}")
231
+
232
+ ast_database = []
233
+ for data in self._data['api']:
234
+ ast_tree = ast_parse(data['api_call'])
235
+ ast_database.append(ast_tree)
236
+ self._data['ast'] = ast_database
237
+
238
+ def run( # type: ignore[override]
239
+ self,
240
+ agent: ChatAgent,
241
+ dataset_name: Literal["huggingface", "tensorflowhub", "torchhub"],
242
+ randomize: bool = False,
243
+ subset: Optional[int] = None,
244
+ ) -> Dict[str, Any]:
245
+ r"""Run the benchmark.
246
+
247
+ Args:
248
+ agent (ChatAgent): The agent to run the
249
+ benchmark.
250
+ dataset_name (Literal["huggingface",
251
+ "tensorflowhub", "torchhub"]):
252
+ The dataset to run the benchmark.
253
+ randomize (bool, optional): Whether to randomize the data.
254
+ (default: :obj:`False`)
255
+ subset (Optional[int], optional): The subset of data to run.
256
+ (default: :obj:`None`)
257
+ """
258
+
259
+ if dataset_name not in dataset_mapping:
260
+ raise ValueError(f"Invalid value for dataset: {dataset_name}.")
261
+
262
+ logger.info(f"Running APIBench benchmark on {dataset_name}.")
263
+ self.load(dataset_name)
264
+ datas = self._data['questions']
265
+
266
+ # Shuffle and subset data if necessary
267
+ if randomize:
268
+ random.shuffle(datas)
269
+ if subset:
270
+ datas = datas[:subset]
271
+
272
+ logger.info(f"Number of tasks: {len(datas)}")
273
+
274
+ # Initialize results storage
275
+ self._results = []
276
+
277
+ with open(self.save_to, "w") as f:
278
+ for question in tqdm(datas, desc="Running"):
279
+ prompt = encode_question(question["text"], dataset_name)
280
+ msg = BaseMessage.make_user_message(
281
+ role_name="User", content=prompt
282
+ )
283
+ try:
284
+ # Generate response
285
+ responses = agent.step(msg)
286
+ response = responses.msgs[0].content
287
+ api_database = self._data['api']
288
+ qa_pairs = self._data['eval']
289
+ ast_database = self._data['ast']
290
+ question_id = question['question_id']
291
+
292
+ # Evaluate response
293
+ error, correct, hallucination = evaluate_response(
294
+ response,
295
+ question_id,
296
+ dataset_name,
297
+ api_database,
298
+ qa_pairs,
299
+ ast_database,
300
+ )
301
+ self._results.append(
302
+ {
303
+ "question": question,
304
+ "agent_response": response,
305
+ "correct": correct,
306
+ "hallucination": hallucination,
307
+ "error": str(error) if error else None,
308
+ }
309
+ )
310
+ except Exception as e:
311
+ logger.warning(
312
+ f"Error in processing task: {question}: {e}"
313
+ )
314
+ self._results.append(
315
+ {
316
+ "question": question,
317
+ "agent_response": None,
318
+ "correct": False,
319
+ "hallucination": False,
320
+ "error": str(e),
321
+ }
322
+ )
323
+
324
+ agent.reset()
325
+
326
+ f.write(json.dumps(self._results[-1], indent=2) + "\n")
327
+ f.flush()
328
+
329
+ total = len(self._results)
330
+ correct = sum(r["correct"] for r in self.results)
331
+ hallucination = sum(r["hallucination"] for r in self.results)
332
+
333
+ return {
334
+ "total": total,
335
+ "correct": correct,
336
+ "hallucination": hallucination,
337
+ "accuracy": correct / total if total else "N/A",
338
+ "hallucination rate": hallucination / total if total else "N/A",
339
+ }
340
+
341
+
342
+ # This code is modified from the
343
+ # evaluators in the original repo
344
+ # https://github.com/ShishirPatil/gorilla
345
+ # Get all the subtrees given a root_node
346
+ def get_all_sub_trees(root_node):
347
+ node_stack = []
348
+ sub_tree_sexp_list = []
349
+ depth = 1
350
+ # text = root_node.text
351
+ node_stack.append([root_node, depth])
352
+ while len(node_stack) != 0:
353
+ cur_node, cur_depth = node_stack.pop()
354
+ if cur_node.child_count > 0:
355
+ sub_tree_sexp_list.append(
356
+ [
357
+ str(cur_node),
358
+ cur_depth,
359
+ cur_node,
360
+ cur_node.children[0].text,
361
+ ]
362
+ )
363
+ else:
364
+ sub_tree_sexp_list.append(
365
+ [str(cur_node), cur_depth, cur_node, None]
366
+ )
367
+ for child_node in cur_node.children:
368
+ if len(child_node.children) != 0:
369
+ depth = cur_depth + 1
370
+ node_stack.append([child_node, depth])
371
+ return sub_tree_sexp_list
372
+
373
+
374
+ # Parse the program into AST trees
375
+ def ast_parse(candidate):
376
+ PY_LANGUAGE = Language(tspython.language())
377
+ parser = Parser(PY_LANGUAGE)
378
+
379
+ candidate_tree = parser.parse(bytes(candidate, "utf8")).root_node
380
+ return candidate_tree
381
+
382
+
383
+ # Get all the arguments in the ast tree
384
+ def get_args(node, dataset_name):
385
+ if node.child_count == 0:
386
+ return []
387
+ args_list = []
388
+ if dataset_name == "huggingface":
389
+ for child in node.children[0].children[0].children[1].children:
390
+ if "=" in child.text.decode():
391
+ args_list.append(child.children[2].text)
392
+ elif (
393
+ child.text.decode() != "("
394
+ and child.text.decode() != ")"
395
+ and child.text.decode() != ","
396
+ ):
397
+ args_list.append(child.text)
398
+ elif dataset_name == "tensorflowhub":
399
+ for child in node.children[0].children[0].children[1].children:
400
+ if (
401
+ 'model=' in child.text.decode()
402
+ or 'model =' in child.text.decode()
403
+ ):
404
+ args_list.append(child.children[2].text)
405
+ elif (
406
+ child.text.decode() != "("
407
+ and child.text.decode() != ")"
408
+ and child.text.decode() != ","
409
+ ):
410
+ args_list.append(child.text)
411
+ elif dataset_name == "torchhub":
412
+ for child in node.children[0].children[0].children[1].children:
413
+ if (
414
+ "repo_or_dir" in child.text.decode()
415
+ or "model" in child.text.decode()
416
+ ):
417
+ args_list.append(child.children[2].text)
418
+ return args_list
419
+
420
+
421
+ # Check if there is an api match
422
+ def ast_check(candidate_subtree_list, base_tree_list, dataset_name):
423
+ for idx, base_tree in enumerate(base_tree_list):
424
+ if base_tree.children[0].children[0].child_count == 0:
425
+ continue
426
+ api_name = base_tree.children[0].children[0].children[0].text
427
+ for candidate_tree in candidate_subtree_list:
428
+ if candidate_tree[3] == api_name:
429
+ break
430
+ # Now we have a sub-tree
431
+ candidate_tree = candidate_tree[2]
432
+ args_list = get_args(base_tree, dataset_name)
433
+ if len(args_list) == 0:
434
+ continue
435
+ ast_match = True
436
+ for arg in args_list:
437
+ if (
438
+ arg.decode().lstrip("'").rstrip("'")
439
+ not in candidate_tree.text.decode()
440
+ ):
441
+ ast_match = False
442
+ break
443
+ if ast_match:
444
+ return idx
445
+ return -1
446
+
447
+
448
+ def evaluate_response(
449
+ response, question_id, dataset_name, api_database, qa_pairs, ast_database
450
+ ):
451
+ try:
452
+ # Index the "api_call" domain
453
+ output = response.split("api_call")
454
+ if len(output) == 1:
455
+ api_call = output[0]
456
+ else:
457
+ # Parse the output
458
+ output = output[1].split("api_provider")[0]
459
+ if ":" not in output:
460
+ start = 0
461
+ else:
462
+ start = output.index(":")
463
+ if ")" not in output:
464
+ end = -2
465
+ else:
466
+ end = output.rindex(")")
467
+ api_call = output[start + 2 : end + 1]
468
+
469
+ try:
470
+ ast_tree = ast_parse(api_call)
471
+ except Exception as parse_error:
472
+ print(f"Error parsing api_call: {api_call}, error: {parse_error}")
473
+ return parse_error, False, False
474
+ # Search for a subtree
475
+ ast_subtree_list = get_all_sub_trees(ast_tree)
476
+ # Check which ast tree is matching
477
+ database_index = ast_check(
478
+ ast_subtree_list, ast_database, dataset_name
479
+ )
480
+ # We cannot index this ast in our database
481
+ if database_index == -1:
482
+ halluncination = True
483
+ correct = False
484
+ # We index our reference api_call
485
+ ref_api_call = api_database[database_index]
486
+ # Check for functionality
487
+ if ref_api_call['domain'] == qa_pairs[question_id - 1]['domain']:
488
+ correct = True
489
+ halluncination = False
490
+ else:
491
+ return None, False, False
492
+ except Exception as e:
493
+ print(f'Error parsing response: {response}, error: {e}')
494
+ return e, False, False
495
+
496
+ return None, correct, halluncination
camel/benchmarks/gaia.py CHANGED
@@ -25,8 +25,8 @@ from typing import Any, Dict, List, Literal, Optional, Protocol, Union
25
25
  from tqdm import tqdm
26
26
 
27
27
  from camel.agents import ChatAgent
28
- from camel.benchmarks import BaseBenchmark
29
- from camel.messages.base import BaseMessage
28
+ from camel.benchmarks.base import BaseBenchmark
29
+ from camel.messages import BaseMessage
30
30
  from camel.retrievers.auto_retriever import AutoRetriever
31
31
 
32
32
  logger = logging.getLogger(__name__)