camel-ai 0.2.35__py3-none-any.whl → 0.2.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/__init__.py +2 -0
- camel/agents/repo_agent.py +579 -0
- camel/configs/__init__.py +3 -0
- camel/configs/aiml_config.py +20 -19
- camel/configs/anthropic_config.py +25 -27
- camel/configs/cohere_config.py +11 -10
- camel/configs/deepseek_config.py +16 -16
- camel/configs/gemini_config.py +8 -8
- camel/configs/groq_config.py +18 -19
- camel/configs/internlm_config.py +8 -8
- camel/configs/litellm_config.py +26 -24
- camel/configs/mistral_config.py +8 -8
- camel/configs/moonshot_config.py +11 -11
- camel/configs/nvidia_config.py +13 -13
- camel/configs/ollama_config.py +14 -15
- camel/configs/openai_config.py +3 -3
- camel/configs/openrouter_config.py +106 -0
- camel/configs/qwen_config.py +8 -8
- camel/configs/reka_config.py +12 -11
- camel/configs/samba_config.py +14 -14
- camel/configs/sglang_config.py +15 -16
- camel/configs/siliconflow_config.py +18 -17
- camel/configs/togetherai_config.py +18 -19
- camel/configs/vllm_config.py +18 -19
- camel/configs/yi_config.py +7 -8
- camel/configs/zhipuai_config.py +8 -9
- camel/datasets/few_shot_generator.py +2 -5
- camel/datasets/static_dataset.py +25 -23
- camel/environments/models.py +3 -0
- camel/environments/single_step.py +212 -132
- camel/extractors/__init__.py +16 -1
- camel/memories/agent_memories.py +2 -1
- camel/memories/blocks/chat_history_block.py +2 -1
- camel/models/__init__.py +2 -0
- camel/models/gemini_model.py +36 -0
- camel/models/groq_model.py +6 -3
- camel/models/model_factory.py +3 -0
- camel/models/openrouter_model.py +204 -0
- camel/storages/__init__.py +2 -0
- camel/storages/key_value_storages/__init__.py +2 -0
- camel/storages/key_value_storages/mem0_cloud.py +224 -0
- camel/storages/vectordb_storages/qdrant.py +3 -3
- camel/toolkits/__init__.py +2 -0
- camel/toolkits/browser_toolkit.py +43 -0
- camel/toolkits/code_execution.py +2 -1
- camel/toolkits/mcp_toolkit.py +30 -1
- camel/toolkits/thinking_toolkit.py +74 -0
- camel/types/enums.py +27 -0
- camel/types/unified_model_type.py +5 -0
- camel/utils/chunker/code_chunker.py +9 -15
- camel/verifiers/__init__.py +1 -2
- camel/verifiers/base.py +159 -99
- camel/verifiers/models.py +0 -12
- camel/verifiers/python_verifier.py +316 -60
- {camel_ai-0.2.35.dist-info → camel_ai-0.2.37.dist-info}/METADATA +54 -5
- {camel_ai-0.2.35.dist-info → camel_ai-0.2.37.dist-info}/RECORD +59 -54
- {camel_ai-0.2.35.dist-info → camel_ai-0.2.37.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.35.dist-info → camel_ai-0.2.37.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
|
|
15
|
+
from typing import List, Optional
|
|
16
|
+
|
|
17
|
+
from camel.logger import get_logger
|
|
18
|
+
from camel.toolkits import FunctionTool
|
|
19
|
+
from camel.toolkits.base import BaseToolkit
|
|
20
|
+
|
|
21
|
+
logger = get_logger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ThinkingToolkit(BaseToolkit):
|
|
25
|
+
r"""A toolkit for recording thoughts during reasoning processes.
|
|
26
|
+
|
|
27
|
+
Attributes:
|
|
28
|
+
thoughts (List[str]): A list to store the recorded thoughts.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
timeout: Optional[float] = None,
|
|
34
|
+
):
|
|
35
|
+
r"""Initialize the ThinkingToolkit.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
timeout (Optional[float]): The timeout for the toolkit.
|
|
39
|
+
(default: :obj: `None`)
|
|
40
|
+
"""
|
|
41
|
+
super().__init__(timeout=timeout)
|
|
42
|
+
self.thoughts: List[str] = []
|
|
43
|
+
|
|
44
|
+
def think(self, thought: str) -> str:
|
|
45
|
+
r"""Use the tool to think about something.
|
|
46
|
+
It will not obtain new information or change the database, but just
|
|
47
|
+
append the thought to the log. Use it when complex reasoning or some
|
|
48
|
+
cache memory is needed.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
thought (str): A thought to think about.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
str: The full log of thoughts including the new thought.
|
|
55
|
+
"""
|
|
56
|
+
try:
|
|
57
|
+
logger.debug(f"Thought: {thought}")
|
|
58
|
+
self.thoughts.append(thought)
|
|
59
|
+
|
|
60
|
+
thoughts = "\n".join([f"- {t}" for t in self.thoughts])
|
|
61
|
+
return f"Thoughts:\n{thoughts}"
|
|
62
|
+
|
|
63
|
+
except Exception as e:
|
|
64
|
+
error_msg = f"Error recording thought: {e}"
|
|
65
|
+
logger.error(error_msg)
|
|
66
|
+
return error_msg
|
|
67
|
+
|
|
68
|
+
def get_tools(self) -> List[FunctionTool]:
|
|
69
|
+
r"""Get all tools in the toolkit.
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
List[FunctionTool]: A list of tools.
|
|
73
|
+
"""
|
|
74
|
+
return [FunctionTool(self.think)]
|
camel/types/enums.py
CHANGED
|
@@ -63,6 +63,11 @@ class ModelType(UnifiedModelType, Enum):
|
|
|
63
63
|
GROQ_MIXTRAL_8_7B = "mixtral-8x7b-32768"
|
|
64
64
|
GROQ_GEMMA_2_9B_IT = "gemma2-9b-it"
|
|
65
65
|
|
|
66
|
+
# OpenRouter models
|
|
67
|
+
OPENROUTER_LLAMA_3_1_405B = "meta-llama/llama-3.3-405b-instruct"
|
|
68
|
+
OPENROUTER_LLAMA_3_1_70B = "meta-llama/llama-3.3-70b-instruct"
|
|
69
|
+
OPENROUTER_OLYMPICODER_7B = "open-r1/olympiccoder-7b:free"
|
|
70
|
+
|
|
66
71
|
# TogetherAI platform models support tool calling
|
|
67
72
|
TOGETHER_LLAMA_3_1_8B = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
|
|
68
73
|
TOGETHER_LLAMA_3_1_70B = "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo"
|
|
@@ -118,6 +123,7 @@ class ModelType(UnifiedModelType, Enum):
|
|
|
118
123
|
NVIDIA_LLAMA3_3_70B_INSTRUCT = "meta/llama-3.3-70b-instruct"
|
|
119
124
|
|
|
120
125
|
# Gemini models
|
|
126
|
+
GEMINI_2_5_PRO_EXP = "gemini-2.5-pro-exp-03-25"
|
|
121
127
|
GEMINI_2_0_FLASH = "gemini-2.0-flash-exp"
|
|
122
128
|
GEMINI_2_0_FLASH_THINKING = "gemini-2.0-flash-thinking-exp"
|
|
123
129
|
GEMINI_2_0_PRO_EXP = "gemini-2.0-pro-exp-02-05"
|
|
@@ -253,6 +259,7 @@ class ModelType(UnifiedModelType, Enum):
|
|
|
253
259
|
self.is_together,
|
|
254
260
|
self.is_sambanova,
|
|
255
261
|
self.is_groq,
|
|
262
|
+
self.is_openrouter,
|
|
256
263
|
self.is_sglang,
|
|
257
264
|
self.is_moonshot,
|
|
258
265
|
self.is_siliconflow,
|
|
@@ -342,6 +349,15 @@ class ModelType(UnifiedModelType, Enum):
|
|
|
342
349
|
ModelType.GROQ_GEMMA_2_9B_IT,
|
|
343
350
|
}
|
|
344
351
|
|
|
352
|
+
@property
|
|
353
|
+
def is_openrouter(self) -> bool:
|
|
354
|
+
r"""Returns whether this type of models is served by OpenRouter."""
|
|
355
|
+
return self in {
|
|
356
|
+
ModelType.OPENROUTER_LLAMA_3_1_405B,
|
|
357
|
+
ModelType.OPENROUTER_LLAMA_3_1_70B,
|
|
358
|
+
ModelType.OPENROUTER_OLYMPICODER_7B,
|
|
359
|
+
}
|
|
360
|
+
|
|
345
361
|
@property
|
|
346
362
|
def is_together(self) -> bool:
|
|
347
363
|
r"""Returns whether this type of models is served by Together AI."""
|
|
@@ -405,6 +421,7 @@ class ModelType(UnifiedModelType, Enum):
|
|
|
405
421
|
bool: Whether this type of models is gemini.
|
|
406
422
|
"""
|
|
407
423
|
return self in {
|
|
424
|
+
ModelType.GEMINI_2_5_PRO_EXP,
|
|
408
425
|
ModelType.GEMINI_2_0_FLASH,
|
|
409
426
|
ModelType.GEMINI_1_5_FLASH,
|
|
410
427
|
ModelType.GEMINI_1_5_PRO,
|
|
@@ -580,6 +597,7 @@ class ModelType(UnifiedModelType, Enum):
|
|
|
580
597
|
ModelType.MOONSHOT_V1_8K,
|
|
581
598
|
ModelType.GLM_4V_FLASH,
|
|
582
599
|
ModelType.GLM_4_AIRX,
|
|
600
|
+
ModelType.OPENROUTER_OLYMPICODER_7B,
|
|
583
601
|
}:
|
|
584
602
|
return 8_192
|
|
585
603
|
elif self in {
|
|
@@ -686,6 +704,8 @@ class ModelType(UnifiedModelType, Enum):
|
|
|
686
704
|
ModelType.SGLANG_QWEN_2_5_7B,
|
|
687
705
|
ModelType.SGLANG_QWEN_2_5_32B,
|
|
688
706
|
ModelType.SGLANG_QWEN_2_5_72B,
|
|
707
|
+
ModelType.OPENROUTER_LLAMA_3_1_70B,
|
|
708
|
+
ModelType.OPENROUTER_LLAMA_3_1_405B,
|
|
689
709
|
}:
|
|
690
710
|
return 131_072
|
|
691
711
|
elif self in {
|
|
@@ -706,6 +726,7 @@ class ModelType(UnifiedModelType, Enum):
|
|
|
706
726
|
}:
|
|
707
727
|
return 256_000
|
|
708
728
|
elif self in {
|
|
729
|
+
ModelType.GEMINI_2_5_PRO_EXP,
|
|
709
730
|
ModelType.GEMINI_2_0_FLASH,
|
|
710
731
|
ModelType.GEMINI_1_5_FLASH,
|
|
711
732
|
ModelType.GEMINI_1_5_PRO,
|
|
@@ -881,6 +902,7 @@ class ModelPlatformType(Enum):
|
|
|
881
902
|
AZURE = "azure"
|
|
882
903
|
ANTHROPIC = "anthropic"
|
|
883
904
|
GROQ = "groq"
|
|
905
|
+
OPENROUTER = "openrouter"
|
|
884
906
|
OLLAMA = "ollama"
|
|
885
907
|
LITELLM = "litellm"
|
|
886
908
|
ZHIPU = "zhipuai"
|
|
@@ -931,6 +953,11 @@ class ModelPlatformType(Enum):
|
|
|
931
953
|
r"""Returns whether this platform is groq."""
|
|
932
954
|
return self is ModelPlatformType.GROQ
|
|
933
955
|
|
|
956
|
+
@property
|
|
957
|
+
def is_openrouter(self) -> bool:
|
|
958
|
+
r"""Returns whether this platform is openrouter."""
|
|
959
|
+
return self is ModelPlatformType.OPENROUTER
|
|
960
|
+
|
|
934
961
|
@property
|
|
935
962
|
def is_ollama(self) -> bool:
|
|
936
963
|
r"""Returns whether this platform is ollama."""
|
|
@@ -78,6 +78,11 @@ class UnifiedModelType(str):
|
|
|
78
78
|
r"""Returns whether the model is a Groq served model."""
|
|
79
79
|
return True
|
|
80
80
|
|
|
81
|
+
@property
|
|
82
|
+
def is_openrouter(self) -> bool:
|
|
83
|
+
r"""Returns whether the model is a OpenRouter served model."""
|
|
84
|
+
return True
|
|
85
|
+
|
|
81
86
|
@property
|
|
82
87
|
def is_zhipuai(self) -> bool:
|
|
83
88
|
r"""Returns whether the model is a Zhipuai model."""
|
|
@@ -16,9 +16,7 @@ from typing import List, Optional
|
|
|
16
16
|
|
|
17
17
|
from unstructured.documents.elements import Element, ElementMetadata
|
|
18
18
|
|
|
19
|
-
from camel.
|
|
20
|
-
from camel.types import ModelType
|
|
21
|
-
from camel.utils import BaseTokenCounter, OpenAITokenCounter
|
|
19
|
+
from camel.utils import get_model_encoding
|
|
22
20
|
|
|
23
21
|
from .base import BaseChunker
|
|
24
22
|
|
|
@@ -38,20 +36,18 @@ class CodeChunker(BaseChunker):
|
|
|
38
36
|
token counting, if `None`, OpenAITokenCounter will be used.
|
|
39
37
|
(default: :obj:`None`)
|
|
40
38
|
remove_image: (bool, optional): If the chunker should skip the images.
|
|
39
|
+
model_name (str, optional): The tokenizer model name used
|
|
40
|
+
for token counting. (default: :obj:`"cl100k_base"`)
|
|
41
41
|
"""
|
|
42
42
|
|
|
43
43
|
def __init__(
|
|
44
44
|
self,
|
|
45
45
|
chunk_size: int = 8192,
|
|
46
|
-
|
|
46
|
+
model_name: str = "cl100k_base",
|
|
47
47
|
remove_image: Optional[bool] = True,
|
|
48
48
|
):
|
|
49
49
|
self.chunk_size = chunk_size
|
|
50
|
-
self.
|
|
51
|
-
token_counter
|
|
52
|
-
if token_counter
|
|
53
|
-
else OpenAITokenCounter(model=ModelType.GPT_4O_MINI)
|
|
54
|
-
)
|
|
50
|
+
self.tokenizer = get_model_encoding(model_name)
|
|
55
51
|
self.remove_image = remove_image
|
|
56
52
|
self.struct_pattern = re.compile(
|
|
57
53
|
r'^\s*(?:(def|class|function)\s+\w+|'
|
|
@@ -72,9 +68,7 @@ class CodeChunker(BaseChunker):
|
|
|
72
68
|
Returns:
|
|
73
69
|
int: The number of tokens in the input text.
|
|
74
70
|
"""
|
|
75
|
-
return self.
|
|
76
|
-
[OpenAIUserMessage(role="user", name="user", content=text)]
|
|
77
|
-
)
|
|
71
|
+
return len(self.tokenizer.encode(text, disallowed_special=()))
|
|
78
72
|
|
|
79
73
|
def _split_oversized(self, line: str) -> List[str]:
|
|
80
74
|
r"""Splits an oversized line into multiple chunks based on token limits
|
|
@@ -86,7 +80,7 @@ class CodeChunker(BaseChunker):
|
|
|
86
80
|
List[str]: A list of smaller chunks after splitting the
|
|
87
81
|
oversized line.
|
|
88
82
|
"""
|
|
89
|
-
tokens = self.
|
|
83
|
+
tokens = self.tokenizer.encode(line, disallowed_special=())
|
|
90
84
|
chunks = []
|
|
91
85
|
buffer = []
|
|
92
86
|
current_count = 0
|
|
@@ -96,12 +90,12 @@ class CodeChunker(BaseChunker):
|
|
|
96
90
|
current_count += 1
|
|
97
91
|
|
|
98
92
|
if current_count >= self.chunk_size:
|
|
99
|
-
chunks.append(self.
|
|
93
|
+
chunks.append(self.tokenizer.decode(buffer).strip())
|
|
100
94
|
buffer = []
|
|
101
95
|
current_count = 0
|
|
102
96
|
|
|
103
97
|
if buffer:
|
|
104
|
-
chunks.append(self.
|
|
98
|
+
chunks.append(self.tokenizer.decode(buffer))
|
|
105
99
|
return chunks
|
|
106
100
|
|
|
107
101
|
def chunk(self, content: List[str]) -> List[Element]:
|
camel/verifiers/__init__.py
CHANGED
|
@@ -12,12 +12,11 @@
|
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
from .base import BaseVerifier
|
|
15
|
-
from .models import VerificationOutcome
|
|
15
|
+
from .models import VerificationOutcome
|
|
16
16
|
from .python_verifier import PythonVerifier
|
|
17
17
|
|
|
18
18
|
__all__ = [
|
|
19
19
|
"BaseVerifier",
|
|
20
20
|
"VerificationOutcome",
|
|
21
|
-
"VerifierInput",
|
|
22
21
|
"PythonVerifier",
|
|
23
22
|
]
|
camel/verifiers/base.py
CHANGED
|
@@ -16,14 +16,11 @@ import time
|
|
|
16
16
|
from abc import ABC, abstractmethod
|
|
17
17
|
from typing import List, Optional
|
|
18
18
|
|
|
19
|
+
from camel.extractors.base import BaseExtractor
|
|
19
20
|
from camel.logger import get_logger
|
|
20
21
|
from camel.utils import BatchProcessor
|
|
21
22
|
|
|
22
|
-
from .models import
|
|
23
|
-
VerificationOutcome,
|
|
24
|
-
VerificationResult,
|
|
25
|
-
VerifierInput,
|
|
26
|
-
)
|
|
23
|
+
from .models import VerificationOutcome, VerificationResult
|
|
27
24
|
|
|
28
25
|
logger = get_logger(__name__)
|
|
29
26
|
|
|
@@ -48,6 +45,7 @@ class BaseVerifier(ABC):
|
|
|
48
45
|
|
|
49
46
|
def __init__(
|
|
50
47
|
self,
|
|
48
|
+
extractor: Optional[BaseExtractor] = None,
|
|
51
49
|
max_parallel: Optional[int] = None,
|
|
52
50
|
timeout: Optional[float] = None,
|
|
53
51
|
max_retries: int = 3,
|
|
@@ -76,6 +74,9 @@ class BaseVerifier(ABC):
|
|
|
76
74
|
down. (default: :obj:`85.0`)
|
|
77
75
|
**kwargs: Additional verifier parameters.
|
|
78
76
|
"""
|
|
77
|
+
|
|
78
|
+
self.extractor = extractor
|
|
79
|
+
|
|
79
80
|
self._is_setup: bool = False
|
|
80
81
|
self._max_parallel: Optional[int] = max_parallel
|
|
81
82
|
self._timeout: Optional[float] = timeout
|
|
@@ -86,7 +87,7 @@ class BaseVerifier(ABC):
|
|
|
86
87
|
self._memory_threshold: float = memory_threshold
|
|
87
88
|
self._batch_processor: BatchProcessor = BatchProcessor()
|
|
88
89
|
|
|
89
|
-
async def setup(self) -> None:
|
|
90
|
+
async def setup(self, **kwargs) -> None:
|
|
90
91
|
r"""Set up the verifier with necessary resources.
|
|
91
92
|
|
|
92
93
|
Initializes:
|
|
@@ -101,6 +102,8 @@ class BaseVerifier(ABC):
|
|
|
101
102
|
return
|
|
102
103
|
|
|
103
104
|
try:
|
|
105
|
+
if self.extractor:
|
|
106
|
+
await self.extractor.setup()
|
|
104
107
|
batch_size = max(1, self._initial_batch_size or 10)
|
|
105
108
|
max_parallel = max(1, self._max_parallel or 1)
|
|
106
109
|
self._batch_processor = BatchProcessor()
|
|
@@ -110,7 +113,7 @@ class BaseVerifier(ABC):
|
|
|
110
113
|
f"batch_size={batch_size}, max_parallel={max_parallel}"
|
|
111
114
|
)
|
|
112
115
|
|
|
113
|
-
await self._setup()
|
|
116
|
+
await self._setup(**kwargs)
|
|
114
117
|
self._is_setup = True
|
|
115
118
|
|
|
116
119
|
except Exception as e:
|
|
@@ -122,7 +125,7 @@ class BaseVerifier(ABC):
|
|
|
122
125
|
raise RuntimeError(error_msg) from e
|
|
123
126
|
|
|
124
127
|
@abstractmethod
|
|
125
|
-
async def _setup(self) -> None:
|
|
128
|
+
async def _setup(self, **kwargs) -> None:
|
|
126
129
|
r"""Implement verifier-specific setup logic."""
|
|
127
130
|
pass
|
|
128
131
|
|
|
@@ -140,6 +143,8 @@ class BaseVerifier(ABC):
|
|
|
140
143
|
return
|
|
141
144
|
|
|
142
145
|
try:
|
|
146
|
+
if self.extractor:
|
|
147
|
+
await self.extractor.cleanup()
|
|
143
148
|
self._batch_processor = BatchProcessor()
|
|
144
149
|
await self._cleanup()
|
|
145
150
|
logger.info(f"{self.__class__.__name__} cleaned up successfully")
|
|
@@ -157,26 +162,33 @@ class BaseVerifier(ABC):
|
|
|
157
162
|
r"""Implement verifier-specific cleanup logic."""
|
|
158
163
|
pass
|
|
159
164
|
|
|
160
|
-
async def verify(
|
|
165
|
+
async def verify(
|
|
166
|
+
self, solution: str, ground_truth: Optional[str]
|
|
167
|
+
) -> VerificationResult:
|
|
161
168
|
r"""Perform verification with full error handling.
|
|
162
169
|
|
|
163
|
-
|
|
164
|
-
|
|
170
|
+
This method verifies the correctness of a generated solution by
|
|
171
|
+
comparing it against the provided ground truth. It handles
|
|
172
|
+
execution errors, timeouts, and retry attempts to ensure robust
|
|
173
|
+
validation.
|
|
165
174
|
|
|
166
175
|
Args:
|
|
167
|
-
|
|
176
|
+
solution (str): The generated response that needs verification.
|
|
177
|
+
ground_truth (Optional[str]): The expected correct answer to
|
|
178
|
+
compare against.
|
|
168
179
|
|
|
169
180
|
Returns:
|
|
170
|
-
VerificationResult:
|
|
171
|
-
- status
|
|
172
|
-
- result:
|
|
173
|
-
- duration: Time taken for verification
|
|
174
|
-
- metadata: Additional details
|
|
175
|
-
- error_message: Error description
|
|
181
|
+
VerificationResult: A structured object containing:
|
|
182
|
+
- status (SUCCESS/FAILURE/ERROR/TIMEOUT)
|
|
183
|
+
- result (str): The verification outcome or processed output.
|
|
184
|
+
- duration (float): Time taken for verification.
|
|
185
|
+
- metadata (dict): Additional details such as retry attempts.
|
|
186
|
+
- error_message (Optional[str]): Error description,
|
|
187
|
+
if applicable.
|
|
176
188
|
|
|
177
189
|
Raises:
|
|
178
190
|
RuntimeError: If verification fails unexpectedly.
|
|
179
|
-
asyncio.TimeoutError: If verification
|
|
191
|
+
asyncio.TimeoutError: If verification exceeds the time limit.
|
|
180
192
|
"""
|
|
181
193
|
if not self._is_setup:
|
|
182
194
|
logger.warning(
|
|
@@ -188,14 +200,29 @@ class BaseVerifier(ABC):
|
|
|
188
200
|
start_time = time.time()
|
|
189
201
|
|
|
190
202
|
while attempt < self._max_retries:
|
|
203
|
+
# Extract verifiable part of the proposed solution,
|
|
204
|
+
# if verifier has been initialized with extractor.
|
|
205
|
+
verifiable_solution = (
|
|
206
|
+
await self.extractor.extract(solution)
|
|
207
|
+
if self.extractor
|
|
208
|
+
else solution
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
if not verifiable_solution:
|
|
212
|
+
continue
|
|
213
|
+
|
|
191
214
|
try:
|
|
192
215
|
verification_result = (
|
|
193
216
|
await asyncio.wait_for(
|
|
194
|
-
self._verify_implementation(
|
|
217
|
+
self._verify_implementation(
|
|
218
|
+
verifiable_solution, ground_truth
|
|
219
|
+
),
|
|
195
220
|
timeout=self._timeout,
|
|
196
221
|
)
|
|
197
222
|
if self._timeout
|
|
198
|
-
else await self._verify_implementation(
|
|
223
|
+
else await self._verify_implementation(
|
|
224
|
+
verifiable_solution, ground_truth
|
|
225
|
+
)
|
|
199
226
|
)
|
|
200
227
|
|
|
201
228
|
verification_result.duration = time.time() - start_time
|
|
@@ -240,101 +267,134 @@ class BaseVerifier(ABC):
|
|
|
240
267
|
|
|
241
268
|
@abstractmethod
|
|
242
269
|
async def _verify_implementation(
|
|
243
|
-
self,
|
|
270
|
+
self, solution: str, ground_truth: Optional[str]
|
|
244
271
|
) -> VerificationResult:
|
|
245
|
-
r"""
|
|
272
|
+
r"""Abstract method for verification logic.
|
|
273
|
+
|
|
274
|
+
Subclasses must implement this method to define how the solution
|
|
275
|
+
should be processed, evaluated, and compared to the ground truth.
|
|
246
276
|
|
|
247
277
|
Args:
|
|
248
|
-
|
|
278
|
+
solution (str): The generated response requiring verification.
|
|
279
|
+
ground_truth (Optional[str]): The expected reference output.
|
|
249
280
|
|
|
250
281
|
Returns:
|
|
251
|
-
VerificationResult:
|
|
282
|
+
VerificationResult: Contains verification status and details.
|
|
252
283
|
|
|
253
284
|
Raises:
|
|
254
|
-
NotImplementedError:
|
|
285
|
+
NotImplementedError: If the method is not implemented
|
|
286
|
+
in a subclass.
|
|
255
287
|
"""
|
|
256
288
|
raise NotImplementedError(
|
|
257
289
|
"Subclasses must implement _verify_implementation()"
|
|
258
290
|
)
|
|
259
291
|
|
|
292
|
+
# TODO: check again
|
|
293
|
+
async def verify_batch(
|
|
294
|
+
self,
|
|
295
|
+
solutions: List[str],
|
|
296
|
+
ground_truths: List[Optional[str]],
|
|
297
|
+
raise_on_error: bool = False,
|
|
298
|
+
) -> List[VerificationResult]:
|
|
299
|
+
r"""Verify multiple solutions in parallel with controlled concurrency.
|
|
260
300
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
r"""Verify multiple results in parallel with controlled concurrency.
|
|
301
|
+
This method verifies multiple generated solutions against their
|
|
302
|
+
respective ground truths using parallel execution. It handles
|
|
303
|
+
timeouts, execution errors, and batch processing optimizations.
|
|
265
304
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
305
|
+
Args:
|
|
306
|
+
solutions (List[str]): A list of generated solutions to be
|
|
307
|
+
verified.
|
|
308
|
+
ground_truths (List[Optional[str]]): A list of expected outputs for
|
|
309
|
+
comparison. Each element corresponds to a solution.
|
|
310
|
+
raise_on_error (bool, optional): If True, raises an exception if
|
|
311
|
+
any verification fails. (default: :obj:`False`)
|
|
270
312
|
|
|
271
|
-
|
|
272
|
-
|
|
313
|
+
Returns:
|
|
314
|
+
List[VerificationResult]: A list of verification results, one per
|
|
315
|
+
input solution.
|
|
273
316
|
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
# Get current batch parameters from processor with defaults if not
|
|
286
|
-
# present
|
|
287
|
-
max_workers = getattr(
|
|
288
|
-
self._batch_processor, 'max_workers', self._max_parallel or 1
|
|
289
|
-
)
|
|
290
|
-
batch_size = getattr(
|
|
291
|
-
self._batch_processor, 'batch_size', self._initial_batch_size or 10
|
|
292
|
-
)
|
|
293
|
-
semaphore = asyncio.Semaphore(max(1, max_workers))
|
|
294
|
-
|
|
295
|
-
async def _verify_with_semaphore(
|
|
296
|
-
response: VerifierInput,
|
|
297
|
-
) -> VerificationResult:
|
|
298
|
-
start_time = time.time()
|
|
299
|
-
try:
|
|
300
|
-
async with semaphore:
|
|
301
|
-
verification_result = await self.verify(response)
|
|
302
|
-
processing_time = time.time() - start_time
|
|
303
|
-
success = verification_result.status == VerificationOutcome.SUCCESS
|
|
304
|
-
self._batch_processor.adjust_batch_size(success, processing_time)
|
|
305
|
-
return verification_result
|
|
306
|
-
except Exception as e:
|
|
307
|
-
processing_time = time.time() - start_time
|
|
308
|
-
self._batch_processor.adjust_batch_size(False, processing_time)
|
|
309
|
-
logger.error(f"Verification failed: {e!s}", exc_info=True)
|
|
310
|
-
return VerificationResult(
|
|
311
|
-
status=VerificationOutcome.ERROR,
|
|
312
|
-
result="",
|
|
313
|
-
error_message=str(e),
|
|
314
|
-
metadata={"error_type": type(e).__name__},
|
|
317
|
+
Raises:
|
|
318
|
+
RuntimeError: If any verification fails and `raise_on_error` is
|
|
319
|
+
True.
|
|
320
|
+
asyncio.TimeoutError: If verifications time out after maximum
|
|
321
|
+
retries.
|
|
322
|
+
"""
|
|
323
|
+
|
|
324
|
+
if not self._is_setup:
|
|
325
|
+
logger.warning(
|
|
326
|
+
f"{self.__class__.__name__} not set up, calling setup()"
|
|
315
327
|
)
|
|
328
|
+
await self.setup()
|
|
316
329
|
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
batch_results = await asyncio.gather(*verification_tasks)
|
|
326
|
-
all_results.extend(batch_results)
|
|
327
|
-
except Exception as e:
|
|
328
|
-
logger.error(f"Batch verification failed: {e!s}", exc_info=True)
|
|
329
|
-
if raise_on_error:
|
|
330
|
-
raise RuntimeError(f"Batch verification failed: {e!s}") from e
|
|
330
|
+
# Retrieve batch processing settings
|
|
331
|
+
max_workers = getattr(
|
|
332
|
+
self._batch_processor, 'max_workers', self._max_parallel or 1
|
|
333
|
+
)
|
|
334
|
+
batch_size = getattr(
|
|
335
|
+
self._batch_processor, 'batch_size', self._initial_batch_size or 10
|
|
336
|
+
)
|
|
337
|
+
semaphore = asyncio.Semaphore(max(1, max_workers))
|
|
331
338
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
+
async def _verify_with_semaphore(
|
|
340
|
+
solution: str, ground_truth: Optional[str]
|
|
341
|
+
) -> VerificationResult:
|
|
342
|
+
start_time = time.time()
|
|
343
|
+
try:
|
|
344
|
+
async with semaphore:
|
|
345
|
+
verification_result = await self.verify(
|
|
346
|
+
solution, ground_truth
|
|
347
|
+
)
|
|
348
|
+
processing_time = time.time() - start_time
|
|
349
|
+
success = (
|
|
350
|
+
verification_result.status == VerificationOutcome.SUCCESS
|
|
351
|
+
)
|
|
352
|
+
self._batch_processor.adjust_batch_size(
|
|
353
|
+
success, processing_time
|
|
354
|
+
)
|
|
355
|
+
return verification_result
|
|
356
|
+
except Exception as e:
|
|
357
|
+
processing_time = time.time() - start_time
|
|
358
|
+
self._batch_processor.adjust_batch_size(False, processing_time)
|
|
359
|
+
logger.error(f"Verification failed: {e!s}", exc_info=True)
|
|
360
|
+
return VerificationResult(
|
|
361
|
+
status=VerificationOutcome.ERROR,
|
|
362
|
+
result="",
|
|
363
|
+
error_message=str(e),
|
|
364
|
+
metadata={"error_type": type(e).__name__},
|
|
365
|
+
)
|
|
339
366
|
|
|
340
|
-
|
|
367
|
+
# Process in batches
|
|
368
|
+
all_results: List[VerificationResult] = []
|
|
369
|
+
for i in range(0, len(solutions), batch_size):
|
|
370
|
+
batch_solutions = solutions[i : i + batch_size]
|
|
371
|
+
batch_ground_truths = ground_truths[i : i + batch_size]
|
|
372
|
+
|
|
373
|
+
verification_tasks = [
|
|
374
|
+
_verify_with_semaphore(solution, ground_truth)
|
|
375
|
+
for solution, ground_truth in zip(
|
|
376
|
+
batch_solutions, batch_ground_truths
|
|
377
|
+
)
|
|
378
|
+
]
|
|
379
|
+
try:
|
|
380
|
+
batch_results = await asyncio.gather(*verification_tasks)
|
|
381
|
+
all_results.extend(batch_results)
|
|
382
|
+
except Exception as e:
|
|
383
|
+
logger.error(
|
|
384
|
+
f"Batch verification failed: {e!s}", exc_info=True
|
|
385
|
+
)
|
|
386
|
+
if raise_on_error:
|
|
387
|
+
raise RuntimeError(
|
|
388
|
+
f"Batch verification failed: {e!s}"
|
|
389
|
+
) from e
|
|
390
|
+
|
|
391
|
+
if raise_on_error and any(
|
|
392
|
+
r.status
|
|
393
|
+
in {VerificationOutcome.ERROR, VerificationOutcome.TIMEOUT}
|
|
394
|
+
for r in all_results
|
|
395
|
+
):
|
|
396
|
+
error_msg = "One or more verifications failed"
|
|
397
|
+
logger.error(error_msg)
|
|
398
|
+
raise RuntimeError(error_msg)
|
|
399
|
+
|
|
400
|
+
return all_results
|
camel/verifiers/models.py
CHANGED
|
@@ -18,18 +18,6 @@ from typing import Any, Dict, Optional
|
|
|
18
18
|
from pydantic import BaseModel, Field
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
class VerifierInput(BaseModel):
|
|
22
|
-
r"""Structured input to the verifier"""
|
|
23
|
-
|
|
24
|
-
llm_response: str = Field(
|
|
25
|
-
description="The LLM response to be verified."
|
|
26
|
-
"Needs to be in a format that the verifier can handle."
|
|
27
|
-
)
|
|
28
|
-
ground_truth: Optional[str] = Field(
|
|
29
|
-
None, description="The ground truth data, if available."
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
|
|
33
21
|
class VerificationOutcome(Enum):
|
|
34
22
|
r"""Enum representing the status of a verification."""
|
|
35
23
|
|