camel-ai 0.2.35__py3-none-any.whl → 0.2.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (59) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/__init__.py +2 -0
  3. camel/agents/repo_agent.py +579 -0
  4. camel/configs/__init__.py +3 -0
  5. camel/configs/aiml_config.py +20 -19
  6. camel/configs/anthropic_config.py +25 -27
  7. camel/configs/cohere_config.py +11 -10
  8. camel/configs/deepseek_config.py +16 -16
  9. camel/configs/gemini_config.py +8 -8
  10. camel/configs/groq_config.py +18 -19
  11. camel/configs/internlm_config.py +8 -8
  12. camel/configs/litellm_config.py +26 -24
  13. camel/configs/mistral_config.py +8 -8
  14. camel/configs/moonshot_config.py +11 -11
  15. camel/configs/nvidia_config.py +13 -13
  16. camel/configs/ollama_config.py +14 -15
  17. camel/configs/openai_config.py +3 -3
  18. camel/configs/openrouter_config.py +106 -0
  19. camel/configs/qwen_config.py +8 -8
  20. camel/configs/reka_config.py +12 -11
  21. camel/configs/samba_config.py +14 -14
  22. camel/configs/sglang_config.py +15 -16
  23. camel/configs/siliconflow_config.py +18 -17
  24. camel/configs/togetherai_config.py +18 -19
  25. camel/configs/vllm_config.py +18 -19
  26. camel/configs/yi_config.py +7 -8
  27. camel/configs/zhipuai_config.py +8 -9
  28. camel/datasets/few_shot_generator.py +2 -5
  29. camel/datasets/static_dataset.py +25 -23
  30. camel/environments/models.py +3 -0
  31. camel/environments/single_step.py +212 -132
  32. camel/extractors/__init__.py +16 -1
  33. camel/memories/agent_memories.py +2 -1
  34. camel/memories/blocks/chat_history_block.py +2 -1
  35. camel/models/__init__.py +2 -0
  36. camel/models/gemini_model.py +36 -0
  37. camel/models/groq_model.py +6 -3
  38. camel/models/model_factory.py +3 -0
  39. camel/models/openrouter_model.py +204 -0
  40. camel/storages/__init__.py +2 -0
  41. camel/storages/key_value_storages/__init__.py +2 -0
  42. camel/storages/key_value_storages/mem0_cloud.py +224 -0
  43. camel/storages/vectordb_storages/qdrant.py +3 -3
  44. camel/toolkits/__init__.py +2 -0
  45. camel/toolkits/browser_toolkit.py +43 -0
  46. camel/toolkits/code_execution.py +2 -1
  47. camel/toolkits/mcp_toolkit.py +30 -1
  48. camel/toolkits/thinking_toolkit.py +74 -0
  49. camel/types/enums.py +27 -0
  50. camel/types/unified_model_type.py +5 -0
  51. camel/utils/chunker/code_chunker.py +9 -15
  52. camel/verifiers/__init__.py +1 -2
  53. camel/verifiers/base.py +159 -99
  54. camel/verifiers/models.py +0 -12
  55. camel/verifiers/python_verifier.py +316 -60
  56. {camel_ai-0.2.35.dist-info → camel_ai-0.2.37.dist-info}/METADATA +54 -5
  57. {camel_ai-0.2.35.dist-info → camel_ai-0.2.37.dist-info}/RECORD +59 -54
  58. {camel_ai-0.2.35.dist-info → camel_ai-0.2.37.dist-info}/WHEEL +0 -0
  59. {camel_ai-0.2.35.dist-info → camel_ai-0.2.37.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,74 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+ from typing import List, Optional
16
+
17
+ from camel.logger import get_logger
18
+ from camel.toolkits import FunctionTool
19
+ from camel.toolkits.base import BaseToolkit
20
+
21
+ logger = get_logger(__name__)
22
+
23
+
24
+ class ThinkingToolkit(BaseToolkit):
25
+ r"""A toolkit for recording thoughts during reasoning processes.
26
+
27
+ Attributes:
28
+ thoughts (List[str]): A list to store the recorded thoughts.
29
+ """
30
+
31
+ def __init__(
32
+ self,
33
+ timeout: Optional[float] = None,
34
+ ):
35
+ r"""Initialize the ThinkingToolkit.
36
+
37
+ Args:
38
+ timeout (Optional[float]): The timeout for the toolkit.
39
+ (default: :obj: `None`)
40
+ """
41
+ super().__init__(timeout=timeout)
42
+ self.thoughts: List[str] = []
43
+
44
+ def think(self, thought: str) -> str:
45
+ r"""Use the tool to think about something.
46
+ It will not obtain new information or change the database, but just
47
+ append the thought to the log. Use it when complex reasoning or some
48
+ cache memory is needed.
49
+
50
+ Args:
51
+ thought (str): A thought to think about.
52
+
53
+ Returns:
54
+ str: The full log of thoughts including the new thought.
55
+ """
56
+ try:
57
+ logger.debug(f"Thought: {thought}")
58
+ self.thoughts.append(thought)
59
+
60
+ thoughts = "\n".join([f"- {t}" for t in self.thoughts])
61
+ return f"Thoughts:\n{thoughts}"
62
+
63
+ except Exception as e:
64
+ error_msg = f"Error recording thought: {e}"
65
+ logger.error(error_msg)
66
+ return error_msg
67
+
68
+ def get_tools(self) -> List[FunctionTool]:
69
+ r"""Get all tools in the toolkit.
70
+
71
+ Returns:
72
+ List[FunctionTool]: A list of tools.
73
+ """
74
+ return [FunctionTool(self.think)]
camel/types/enums.py CHANGED
@@ -63,6 +63,11 @@ class ModelType(UnifiedModelType, Enum):
63
63
  GROQ_MIXTRAL_8_7B = "mixtral-8x7b-32768"
64
64
  GROQ_GEMMA_2_9B_IT = "gemma2-9b-it"
65
65
 
66
+ # OpenRouter models
67
+ OPENROUTER_LLAMA_3_1_405B = "meta-llama/llama-3.3-405b-instruct"
68
+ OPENROUTER_LLAMA_3_1_70B = "meta-llama/llama-3.3-70b-instruct"
69
+ OPENROUTER_OLYMPICODER_7B = "open-r1/olympiccoder-7b:free"
70
+
66
71
  # TogetherAI platform models support tool calling
67
72
  TOGETHER_LLAMA_3_1_8B = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
68
73
  TOGETHER_LLAMA_3_1_70B = "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo"
@@ -118,6 +123,7 @@ class ModelType(UnifiedModelType, Enum):
118
123
  NVIDIA_LLAMA3_3_70B_INSTRUCT = "meta/llama-3.3-70b-instruct"
119
124
 
120
125
  # Gemini models
126
+ GEMINI_2_5_PRO_EXP = "gemini-2.5-pro-exp-03-25"
121
127
  GEMINI_2_0_FLASH = "gemini-2.0-flash-exp"
122
128
  GEMINI_2_0_FLASH_THINKING = "gemini-2.0-flash-thinking-exp"
123
129
  GEMINI_2_0_PRO_EXP = "gemini-2.0-pro-exp-02-05"
@@ -253,6 +259,7 @@ class ModelType(UnifiedModelType, Enum):
253
259
  self.is_together,
254
260
  self.is_sambanova,
255
261
  self.is_groq,
262
+ self.is_openrouter,
256
263
  self.is_sglang,
257
264
  self.is_moonshot,
258
265
  self.is_siliconflow,
@@ -342,6 +349,15 @@ class ModelType(UnifiedModelType, Enum):
342
349
  ModelType.GROQ_GEMMA_2_9B_IT,
343
350
  }
344
351
 
352
+ @property
353
+ def is_openrouter(self) -> bool:
354
+ r"""Returns whether this type of models is served by OpenRouter."""
355
+ return self in {
356
+ ModelType.OPENROUTER_LLAMA_3_1_405B,
357
+ ModelType.OPENROUTER_LLAMA_3_1_70B,
358
+ ModelType.OPENROUTER_OLYMPICODER_7B,
359
+ }
360
+
345
361
  @property
346
362
  def is_together(self) -> bool:
347
363
  r"""Returns whether this type of models is served by Together AI."""
@@ -405,6 +421,7 @@ class ModelType(UnifiedModelType, Enum):
405
421
  bool: Whether this type of models is gemini.
406
422
  """
407
423
  return self in {
424
+ ModelType.GEMINI_2_5_PRO_EXP,
408
425
  ModelType.GEMINI_2_0_FLASH,
409
426
  ModelType.GEMINI_1_5_FLASH,
410
427
  ModelType.GEMINI_1_5_PRO,
@@ -580,6 +597,7 @@ class ModelType(UnifiedModelType, Enum):
580
597
  ModelType.MOONSHOT_V1_8K,
581
598
  ModelType.GLM_4V_FLASH,
582
599
  ModelType.GLM_4_AIRX,
600
+ ModelType.OPENROUTER_OLYMPICODER_7B,
583
601
  }:
584
602
  return 8_192
585
603
  elif self in {
@@ -686,6 +704,8 @@ class ModelType(UnifiedModelType, Enum):
686
704
  ModelType.SGLANG_QWEN_2_5_7B,
687
705
  ModelType.SGLANG_QWEN_2_5_32B,
688
706
  ModelType.SGLANG_QWEN_2_5_72B,
707
+ ModelType.OPENROUTER_LLAMA_3_1_70B,
708
+ ModelType.OPENROUTER_LLAMA_3_1_405B,
689
709
  }:
690
710
  return 131_072
691
711
  elif self in {
@@ -706,6 +726,7 @@ class ModelType(UnifiedModelType, Enum):
706
726
  }:
707
727
  return 256_000
708
728
  elif self in {
729
+ ModelType.GEMINI_2_5_PRO_EXP,
709
730
  ModelType.GEMINI_2_0_FLASH,
710
731
  ModelType.GEMINI_1_5_FLASH,
711
732
  ModelType.GEMINI_1_5_PRO,
@@ -881,6 +902,7 @@ class ModelPlatformType(Enum):
881
902
  AZURE = "azure"
882
903
  ANTHROPIC = "anthropic"
883
904
  GROQ = "groq"
905
+ OPENROUTER = "openrouter"
884
906
  OLLAMA = "ollama"
885
907
  LITELLM = "litellm"
886
908
  ZHIPU = "zhipuai"
@@ -931,6 +953,11 @@ class ModelPlatformType(Enum):
931
953
  r"""Returns whether this platform is groq."""
932
954
  return self is ModelPlatformType.GROQ
933
955
 
956
+ @property
957
+ def is_openrouter(self) -> bool:
958
+ r"""Returns whether this platform is openrouter."""
959
+ return self is ModelPlatformType.OPENROUTER
960
+
934
961
  @property
935
962
  def is_ollama(self) -> bool:
936
963
  r"""Returns whether this platform is ollama."""
@@ -78,6 +78,11 @@ class UnifiedModelType(str):
78
78
  r"""Returns whether the model is a Groq served model."""
79
79
  return True
80
80
 
81
+ @property
82
+ def is_openrouter(self) -> bool:
83
+ r"""Returns whether the model is a OpenRouter served model."""
84
+ return True
85
+
81
86
  @property
82
87
  def is_zhipuai(self) -> bool:
83
88
  r"""Returns whether the model is a Zhipuai model."""
@@ -16,9 +16,7 @@ from typing import List, Optional
16
16
 
17
17
  from unstructured.documents.elements import Element, ElementMetadata
18
18
 
19
- from camel.messages import OpenAIUserMessage
20
- from camel.types import ModelType
21
- from camel.utils import BaseTokenCounter, OpenAITokenCounter
19
+ from camel.utils import get_model_encoding
22
20
 
23
21
  from .base import BaseChunker
24
22
 
@@ -38,20 +36,18 @@ class CodeChunker(BaseChunker):
38
36
  token counting, if `None`, OpenAITokenCounter will be used.
39
37
  (default: :obj:`None`)
40
38
  remove_image: (bool, optional): If the chunker should skip the images.
39
+ model_name (str, optional): The tokenizer model name used
40
+ for token counting. (default: :obj:`"cl100k_base"`)
41
41
  """
42
42
 
43
43
  def __init__(
44
44
  self,
45
45
  chunk_size: int = 8192,
46
- token_counter: Optional[BaseTokenCounter] = None,
46
+ model_name: str = "cl100k_base",
47
47
  remove_image: Optional[bool] = True,
48
48
  ):
49
49
  self.chunk_size = chunk_size
50
- self.token_counter = (
51
- token_counter
52
- if token_counter
53
- else OpenAITokenCounter(model=ModelType.GPT_4O_MINI)
54
- )
50
+ self.tokenizer = get_model_encoding(model_name)
55
51
  self.remove_image = remove_image
56
52
  self.struct_pattern = re.compile(
57
53
  r'^\s*(?:(def|class|function)\s+\w+|'
@@ -72,9 +68,7 @@ class CodeChunker(BaseChunker):
72
68
  Returns:
73
69
  int: The number of tokens in the input text.
74
70
  """
75
- return self.token_counter.count_tokens_from_messages(
76
- [OpenAIUserMessage(role="user", name="user", content=text)]
77
- )
71
+ return len(self.tokenizer.encode(text, disallowed_special=()))
78
72
 
79
73
  def _split_oversized(self, line: str) -> List[str]:
80
74
  r"""Splits an oversized line into multiple chunks based on token limits
@@ -86,7 +80,7 @@ class CodeChunker(BaseChunker):
86
80
  List[str]: A list of smaller chunks after splitting the
87
81
  oversized line.
88
82
  """
89
- tokens = self.token_counter.encode(line)
83
+ tokens = self.tokenizer.encode(line, disallowed_special=())
90
84
  chunks = []
91
85
  buffer = []
92
86
  current_count = 0
@@ -96,12 +90,12 @@ class CodeChunker(BaseChunker):
96
90
  current_count += 1
97
91
 
98
92
  if current_count >= self.chunk_size:
99
- chunks.append(self.token_counter.decode(buffer).strip())
93
+ chunks.append(self.tokenizer.decode(buffer).strip())
100
94
  buffer = []
101
95
  current_count = 0
102
96
 
103
97
  if buffer:
104
- chunks.append(self.token_counter.decode(buffer))
98
+ chunks.append(self.tokenizer.decode(buffer))
105
99
  return chunks
106
100
 
107
101
  def chunk(self, content: List[str]) -> List[Element]:
@@ -12,12 +12,11 @@
12
12
  # limitations under the License.
13
13
  # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
14
  from .base import BaseVerifier
15
- from .models import VerificationOutcome, VerifierInput
15
+ from .models import VerificationOutcome
16
16
  from .python_verifier import PythonVerifier
17
17
 
18
18
  __all__ = [
19
19
  "BaseVerifier",
20
20
  "VerificationOutcome",
21
- "VerifierInput",
22
21
  "PythonVerifier",
23
22
  ]
camel/verifiers/base.py CHANGED
@@ -16,14 +16,11 @@ import time
16
16
  from abc import ABC, abstractmethod
17
17
  from typing import List, Optional
18
18
 
19
+ from camel.extractors.base import BaseExtractor
19
20
  from camel.logger import get_logger
20
21
  from camel.utils import BatchProcessor
21
22
 
22
- from .models import (
23
- VerificationOutcome,
24
- VerificationResult,
25
- VerifierInput,
26
- )
23
+ from .models import VerificationOutcome, VerificationResult
27
24
 
28
25
  logger = get_logger(__name__)
29
26
 
@@ -48,6 +45,7 @@ class BaseVerifier(ABC):
48
45
 
49
46
  def __init__(
50
47
  self,
48
+ extractor: Optional[BaseExtractor] = None,
51
49
  max_parallel: Optional[int] = None,
52
50
  timeout: Optional[float] = None,
53
51
  max_retries: int = 3,
@@ -76,6 +74,9 @@ class BaseVerifier(ABC):
76
74
  down. (default: :obj:`85.0`)
77
75
  **kwargs: Additional verifier parameters.
78
76
  """
77
+
78
+ self.extractor = extractor
79
+
79
80
  self._is_setup: bool = False
80
81
  self._max_parallel: Optional[int] = max_parallel
81
82
  self._timeout: Optional[float] = timeout
@@ -86,7 +87,7 @@ class BaseVerifier(ABC):
86
87
  self._memory_threshold: float = memory_threshold
87
88
  self._batch_processor: BatchProcessor = BatchProcessor()
88
89
 
89
- async def setup(self) -> None:
90
+ async def setup(self, **kwargs) -> None:
90
91
  r"""Set up the verifier with necessary resources.
91
92
 
92
93
  Initializes:
@@ -101,6 +102,8 @@ class BaseVerifier(ABC):
101
102
  return
102
103
 
103
104
  try:
105
+ if self.extractor:
106
+ await self.extractor.setup()
104
107
  batch_size = max(1, self._initial_batch_size or 10)
105
108
  max_parallel = max(1, self._max_parallel or 1)
106
109
  self._batch_processor = BatchProcessor()
@@ -110,7 +113,7 @@ class BaseVerifier(ABC):
110
113
  f"batch_size={batch_size}, max_parallel={max_parallel}"
111
114
  )
112
115
 
113
- await self._setup()
116
+ await self._setup(**kwargs)
114
117
  self._is_setup = True
115
118
 
116
119
  except Exception as e:
@@ -122,7 +125,7 @@ class BaseVerifier(ABC):
122
125
  raise RuntimeError(error_msg) from e
123
126
 
124
127
  @abstractmethod
125
- async def _setup(self) -> None:
128
+ async def _setup(self, **kwargs) -> None:
126
129
  r"""Implement verifier-specific setup logic."""
127
130
  pass
128
131
 
@@ -140,6 +143,8 @@ class BaseVerifier(ABC):
140
143
  return
141
144
 
142
145
  try:
146
+ if self.extractor:
147
+ await self.extractor.cleanup()
143
148
  self._batch_processor = BatchProcessor()
144
149
  await self._cleanup()
145
150
  logger.info(f"{self.__class__.__name__} cleaned up successfully")
@@ -157,26 +162,33 @@ class BaseVerifier(ABC):
157
162
  r"""Implement verifier-specific cleanup logic."""
158
163
  pass
159
164
 
160
- async def verify(self, result: VerifierInput) -> VerificationResult:
165
+ async def verify(
166
+ self, solution: str, ground_truth: Optional[str]
167
+ ) -> VerificationResult:
161
168
  r"""Perform verification with full error handling.
162
169
 
163
- Verifies correctness, expected output, reasoning, and symbolic
164
- consistency.
170
+ This method verifies the correctness of a generated solution by
171
+ comparing it against the provided ground truth. It handles
172
+ execution errors, timeouts, and retry attempts to ensure robust
173
+ validation.
165
174
 
166
175
  Args:
167
- result: The response to verify.
176
+ solution (str): The generated response that needs verification.
177
+ ground_truth (Optional[str]): The expected correct answer to
178
+ compare against.
168
179
 
169
180
  Returns:
170
- VerificationResult: Structured result containing:
171
- - status: SUCCESS/FAILURE/ERROR/TIMEOUT
172
- - result: Verification outcome description
173
- - duration: Time taken for verification
174
- - metadata: Additional details
175
- - error_message: Error description if applicable
181
+ VerificationResult: A structured object containing:
182
+ - status (SUCCESS/FAILURE/ERROR/TIMEOUT)
183
+ - result (str): The verification outcome or processed output.
184
+ - duration (float): Time taken for verification.
185
+ - metadata (dict): Additional details such as retry attempts.
186
+ - error_message (Optional[str]): Error description,
187
+ if applicable.
176
188
 
177
189
  Raises:
178
190
  RuntimeError: If verification fails unexpectedly.
179
- asyncio.TimeoutError: If verification times out.
191
+ asyncio.TimeoutError: If verification exceeds the time limit.
180
192
  """
181
193
  if not self._is_setup:
182
194
  logger.warning(
@@ -188,14 +200,29 @@ class BaseVerifier(ABC):
188
200
  start_time = time.time()
189
201
 
190
202
  while attempt < self._max_retries:
203
+ # Extract verifiable part of the proposed solution,
204
+ # if verifier has been initialized with extractor.
205
+ verifiable_solution = (
206
+ await self.extractor.extract(solution)
207
+ if self.extractor
208
+ else solution
209
+ )
210
+
211
+ if not verifiable_solution:
212
+ continue
213
+
191
214
  try:
192
215
  verification_result = (
193
216
  await asyncio.wait_for(
194
- self._verify_implementation(result),
217
+ self._verify_implementation(
218
+ verifiable_solution, ground_truth
219
+ ),
195
220
  timeout=self._timeout,
196
221
  )
197
222
  if self._timeout
198
- else await self._verify_implementation(result)
223
+ else await self._verify_implementation(
224
+ verifiable_solution, ground_truth
225
+ )
199
226
  )
200
227
 
201
228
  verification_result.duration = time.time() - start_time
@@ -240,101 +267,134 @@ class BaseVerifier(ABC):
240
267
 
241
268
  @abstractmethod
242
269
  async def _verify_implementation(
243
- self, result: VerifierInput
270
+ self, solution: str, ground_truth: Optional[str]
244
271
  ) -> VerificationResult:
245
- r"""Implement the actual verification logic.
272
+ r"""Abstract method for verification logic.
273
+
274
+ Subclasses must implement this method to define how the solution
275
+ should be processed, evaluated, and compared to the ground truth.
246
276
 
247
277
  Args:
248
- result: The response to verify.
278
+ solution (str): The generated response requiring verification.
279
+ ground_truth (Optional[str]): The expected reference output.
249
280
 
250
281
  Returns:
251
- VerificationResult: Containing the verification outcome.
282
+ VerificationResult: Contains verification status and details.
252
283
 
253
284
  Raises:
254
- NotImplementedError: Must be implemented in subclasses.
285
+ NotImplementedError: If the method is not implemented
286
+ in a subclass.
255
287
  """
256
288
  raise NotImplementedError(
257
289
  "Subclasses must implement _verify_implementation()"
258
290
  )
259
291
 
292
+ # TODO: check again
293
+ async def verify_batch(
294
+ self,
295
+ solutions: List[str],
296
+ ground_truths: List[Optional[str]],
297
+ raise_on_error: bool = False,
298
+ ) -> List[VerificationResult]:
299
+ r"""Verify multiple solutions in parallel with controlled concurrency.
260
300
 
261
- async def verify_batch(
262
- self, results: List[VerifierInput], raise_on_error: bool = False
263
- ) -> List[VerificationResult]:
264
- r"""Verify multiple results in parallel with controlled concurrency.
301
+ This method verifies multiple generated solutions against their
302
+ respective ground truths using parallel execution. It handles
303
+ timeouts, execution errors, and batch processing optimizations.
265
304
 
266
- Args:
267
- results: List of responses to verify.
268
- raise_on_error: Whether to raise an exception if any verification
269
- fails. (default: :obj:`False`)
305
+ Args:
306
+ solutions (List[str]): A list of generated solutions to be
307
+ verified.
308
+ ground_truths (List[Optional[str]]): A list of expected outputs for
309
+ comparison. Each element corresponds to a solution.
310
+ raise_on_error (bool, optional): If True, raises an exception if
311
+ any verification fails. (default: :obj:`False`)
270
312
 
271
- Returns:
272
- List[VerificationResult]: One for each input response.
313
+ Returns:
314
+ List[VerificationResult]: A list of verification results, one per
315
+ input solution.
273
316
 
274
- Raises:
275
- RuntimeError: If any verification fails and raise_on_error is True.
276
- asyncio.TimeoutError: If verifications time out and max retries
277
- exceeded.
278
- """
279
- if not self._is_setup:
280
- logger.warning(
281
- f"{self.__class__.__name__} not set up, calling setup()"
282
- )
283
- await self.setup()
284
-
285
- # Get current batch parameters from processor with defaults if not
286
- # present
287
- max_workers = getattr(
288
- self._batch_processor, 'max_workers', self._max_parallel or 1
289
- )
290
- batch_size = getattr(
291
- self._batch_processor, 'batch_size', self._initial_batch_size or 10
292
- )
293
- semaphore = asyncio.Semaphore(max(1, max_workers))
294
-
295
- async def _verify_with_semaphore(
296
- response: VerifierInput,
297
- ) -> VerificationResult:
298
- start_time = time.time()
299
- try:
300
- async with semaphore:
301
- verification_result = await self.verify(response)
302
- processing_time = time.time() - start_time
303
- success = verification_result.status == VerificationOutcome.SUCCESS
304
- self._batch_processor.adjust_batch_size(success, processing_time)
305
- return verification_result
306
- except Exception as e:
307
- processing_time = time.time() - start_time
308
- self._batch_processor.adjust_batch_size(False, processing_time)
309
- logger.error(f"Verification failed: {e!s}", exc_info=True)
310
- return VerificationResult(
311
- status=VerificationOutcome.ERROR,
312
- result="",
313
- error_message=str(e),
314
- metadata={"error_type": type(e).__name__},
317
+ Raises:
318
+ RuntimeError: If any verification fails and `raise_on_error` is
319
+ True.
320
+ asyncio.TimeoutError: If verifications time out after maximum
321
+ retries.
322
+ """
323
+
324
+ if not self._is_setup:
325
+ logger.warning(
326
+ f"{self.__class__.__name__} not set up, calling setup()"
315
327
  )
328
+ await self.setup()
316
329
 
317
- # Process in batches
318
- all_results: List[VerificationResult] = []
319
- for i in range(0, len(results), batch_size):
320
- batch = results[i : i + batch_size]
321
- verification_tasks = [
322
- _verify_with_semaphore(result) for result in batch
323
- ]
324
- try:
325
- batch_results = await asyncio.gather(*verification_tasks)
326
- all_results.extend(batch_results)
327
- except Exception as e:
328
- logger.error(f"Batch verification failed: {e!s}", exc_info=True)
329
- if raise_on_error:
330
- raise RuntimeError(f"Batch verification failed: {e!s}") from e
330
+ # Retrieve batch processing settings
331
+ max_workers = getattr(
332
+ self._batch_processor, 'max_workers', self._max_parallel or 1
333
+ )
334
+ batch_size = getattr(
335
+ self._batch_processor, 'batch_size', self._initial_batch_size or 10
336
+ )
337
+ semaphore = asyncio.Semaphore(max(1, max_workers))
331
338
 
332
- if raise_on_error and any(
333
- r.status in {VerificationOutcome.ERROR, VerificationOutcome.TIMEOUT}
334
- for r in all_results
335
- ):
336
- error_msg = "One or more verifications failed"
337
- logger.error(error_msg)
338
- raise RuntimeError(error_msg)
339
+ async def _verify_with_semaphore(
340
+ solution: str, ground_truth: Optional[str]
341
+ ) -> VerificationResult:
342
+ start_time = time.time()
343
+ try:
344
+ async with semaphore:
345
+ verification_result = await self.verify(
346
+ solution, ground_truth
347
+ )
348
+ processing_time = time.time() - start_time
349
+ success = (
350
+ verification_result.status == VerificationOutcome.SUCCESS
351
+ )
352
+ self._batch_processor.adjust_batch_size(
353
+ success, processing_time
354
+ )
355
+ return verification_result
356
+ except Exception as e:
357
+ processing_time = time.time() - start_time
358
+ self._batch_processor.adjust_batch_size(False, processing_time)
359
+ logger.error(f"Verification failed: {e!s}", exc_info=True)
360
+ return VerificationResult(
361
+ status=VerificationOutcome.ERROR,
362
+ result="",
363
+ error_message=str(e),
364
+ metadata={"error_type": type(e).__name__},
365
+ )
339
366
 
340
- return all_results
367
+ # Process in batches
368
+ all_results: List[VerificationResult] = []
369
+ for i in range(0, len(solutions), batch_size):
370
+ batch_solutions = solutions[i : i + batch_size]
371
+ batch_ground_truths = ground_truths[i : i + batch_size]
372
+
373
+ verification_tasks = [
374
+ _verify_with_semaphore(solution, ground_truth)
375
+ for solution, ground_truth in zip(
376
+ batch_solutions, batch_ground_truths
377
+ )
378
+ ]
379
+ try:
380
+ batch_results = await asyncio.gather(*verification_tasks)
381
+ all_results.extend(batch_results)
382
+ except Exception as e:
383
+ logger.error(
384
+ f"Batch verification failed: {e!s}", exc_info=True
385
+ )
386
+ if raise_on_error:
387
+ raise RuntimeError(
388
+ f"Batch verification failed: {e!s}"
389
+ ) from e
390
+
391
+ if raise_on_error and any(
392
+ r.status
393
+ in {VerificationOutcome.ERROR, VerificationOutcome.TIMEOUT}
394
+ for r in all_results
395
+ ):
396
+ error_msg = "One or more verifications failed"
397
+ logger.error(error_msg)
398
+ raise RuntimeError(error_msg)
399
+
400
+ return all_results
camel/verifiers/models.py CHANGED
@@ -18,18 +18,6 @@ from typing import Any, Dict, Optional
18
18
  from pydantic import BaseModel, Field
19
19
 
20
20
 
21
- class VerifierInput(BaseModel):
22
- r"""Structured input to the verifier"""
23
-
24
- llm_response: str = Field(
25
- description="The LLM response to be verified."
26
- "Needs to be in a format that the verifier can handle."
27
- )
28
- ground_truth: Optional[str] = Field(
29
- None, description="The ground truth data, if available."
30
- )
31
-
32
-
33
21
  class VerificationOutcome(Enum):
34
22
  r"""Enum representing the status of a verification."""
35
23