camel-ai 0.2.21__py3-none-any.whl → 0.2.23a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/_types.py +41 -0
- camel/agents/_utils.py +188 -0
- camel/agents/chat_agent.py +556 -965
- camel/agents/knowledge_graph_agent.py +7 -1
- camel/agents/multi_hop_generator_agent.py +1 -1
- camel/configs/base_config.py +10 -13
- camel/configs/deepseek_config.py +4 -30
- camel/configs/gemini_config.py +5 -31
- camel/configs/openai_config.py +14 -32
- camel/configs/qwen_config.py +36 -36
- camel/datagen/self_improving_cot.py +79 -1
- camel/datagen/self_instruct/filter/instruction_filter.py +19 -3
- camel/datagen/self_instruct/self_instruct.py +7 -2
- camel/datasets/__init__.py +28 -0
- camel/datasets/base.py +969 -0
- camel/embeddings/openai_embedding.py +10 -1
- camel/environments/__init__.py +16 -0
- camel/environments/base.py +503 -0
- camel/extractors/__init__.py +16 -0
- camel/extractors/base.py +263 -0
- camel/interpreters/docker/Dockerfile +12 -0
- camel/interpreters/docker_interpreter.py +19 -1
- camel/interpreters/subprocess_interpreter.py +42 -17
- camel/loaders/__init__.py +2 -0
- camel/loaders/mineru_extractor.py +250 -0
- camel/memories/agent_memories.py +16 -1
- camel/memories/blocks/chat_history_block.py +10 -2
- camel/memories/blocks/vectordb_block.py +1 -0
- camel/memories/context_creators/score_based.py +20 -3
- camel/memories/records.py +10 -0
- camel/messages/base.py +8 -8
- camel/models/_utils.py +57 -0
- camel/models/aiml_model.py +48 -17
- camel/models/anthropic_model.py +41 -3
- camel/models/azure_openai_model.py +39 -3
- camel/models/base_model.py +132 -4
- camel/models/cohere_model.py +88 -11
- camel/models/deepseek_model.py +107 -63
- camel/models/gemini_model.py +133 -15
- camel/models/groq_model.py +72 -10
- camel/models/internlm_model.py +14 -3
- camel/models/litellm_model.py +9 -2
- camel/models/mistral_model.py +42 -5
- camel/models/model_manager.py +48 -3
- camel/models/moonshot_model.py +33 -4
- camel/models/nemotron_model.py +32 -3
- camel/models/nvidia_model.py +43 -3
- camel/models/ollama_model.py +139 -17
- camel/models/openai_audio_models.py +7 -1
- camel/models/openai_compatible_model.py +37 -3
- camel/models/openai_model.py +158 -46
- camel/models/qwen_model.py +61 -4
- camel/models/reka_model.py +53 -3
- camel/models/samba_model.py +209 -4
- camel/models/sglang_model.py +153 -14
- camel/models/siliconflow_model.py +16 -3
- camel/models/stub_model.py +46 -4
- camel/models/togetherai_model.py +38 -3
- camel/models/vllm_model.py +37 -3
- camel/models/yi_model.py +36 -3
- camel/models/zhipuai_model.py +38 -3
- camel/retrievers/__init__.py +3 -0
- camel/retrievers/hybrid_retrival.py +237 -0
- camel/toolkits/__init__.py +9 -2
- camel/toolkits/arxiv_toolkit.py +2 -1
- camel/toolkits/ask_news_toolkit.py +4 -2
- camel/toolkits/base.py +22 -3
- camel/toolkits/code_execution.py +2 -0
- camel/toolkits/dappier_toolkit.py +2 -1
- camel/toolkits/data_commons_toolkit.py +38 -12
- camel/toolkits/function_tool.py +13 -0
- camel/toolkits/github_toolkit.py +5 -1
- camel/toolkits/google_maps_toolkit.py +2 -1
- camel/toolkits/google_scholar_toolkit.py +2 -0
- camel/toolkits/human_toolkit.py +0 -3
- camel/toolkits/linkedin_toolkit.py +3 -2
- camel/toolkits/meshy_toolkit.py +3 -2
- camel/toolkits/mineru_toolkit.py +178 -0
- camel/toolkits/networkx_toolkit.py +240 -0
- camel/toolkits/notion_toolkit.py +2 -0
- camel/toolkits/openbb_toolkit.py +3 -2
- camel/toolkits/reddit_toolkit.py +11 -3
- camel/toolkits/retrieval_toolkit.py +6 -1
- camel/toolkits/semantic_scholar_toolkit.py +2 -1
- camel/toolkits/stripe_toolkit.py +8 -2
- camel/toolkits/sympy_toolkit.py +44 -1
- camel/toolkits/video_toolkit.py +2 -0
- camel/toolkits/whatsapp_toolkit.py +3 -2
- camel/toolkits/zapier_toolkit.py +191 -0
- camel/types/__init__.py +2 -2
- camel/types/agents/__init__.py +16 -0
- camel/types/agents/tool_calling_record.py +52 -0
- camel/types/enums.py +3 -0
- camel/types/openai_types.py +16 -14
- camel/utils/__init__.py +2 -1
- camel/utils/async_func.py +2 -2
- camel/utils/commons.py +114 -1
- camel/verifiers/__init__.py +23 -0
- camel/verifiers/base.py +340 -0
- camel/verifiers/models.py +82 -0
- camel/verifiers/python_verifier.py +202 -0
- {camel_ai-0.2.21.dist-info → camel_ai-0.2.23a0.dist-info}/METADATA +273 -256
- {camel_ai-0.2.21.dist-info → camel_ai-0.2.23a0.dist-info}/RECORD +106 -85
- {camel_ai-0.2.21.dist-info → camel_ai-0.2.23a0.dist-info}/WHEEL +1 -1
- {camel_ai-0.2.21.dist-info → camel_ai-0.2.23a0.dist-info}/LICENSE +0 -0
camel/utils/commons.py
CHANGED
|
@@ -19,6 +19,7 @@ import platform
|
|
|
19
19
|
import re
|
|
20
20
|
import socket
|
|
21
21
|
import subprocess
|
|
22
|
+
import threading
|
|
22
23
|
import time
|
|
23
24
|
import zipfile
|
|
24
25
|
from functools import wraps
|
|
@@ -306,10 +307,55 @@ def api_keys_required(
|
|
|
306
307
|
if not value or value.strip() == "":
|
|
307
308
|
missing_keys.append(env_var_name)
|
|
308
309
|
|
|
310
|
+
key_way = "the official website"
|
|
311
|
+
if env_var_name == 'ANTHROPIC_API_KEY':
|
|
312
|
+
key_way = (
|
|
313
|
+
"https://docs.anthropic.com/zh-CN/api/getting-started"
|
|
314
|
+
)
|
|
315
|
+
elif env_var_name == 'AIML_API_KEY':
|
|
316
|
+
key_way = "https://aimlapi.com/"
|
|
317
|
+
elif env_var_name == 'COHERE_API_KEY':
|
|
318
|
+
key_way = "https://cohere.com/"
|
|
319
|
+
elif env_var_name == 'DEEPSEEK_API_KEY':
|
|
320
|
+
key_way = "https://www.deepseek.com/"
|
|
321
|
+
elif env_var_name == 'AZURE_OPENAI_API_KEY':
|
|
322
|
+
key_way = "https://portal.azure.com/"
|
|
323
|
+
elif env_var_name == 'OPENAI_API_KEY':
|
|
324
|
+
key_way = "https://platform.openai.com/docs/overview"
|
|
325
|
+
elif env_var_name == 'FISHAUDIO_API_KEY':
|
|
326
|
+
key_way = "https://fish.audio/"
|
|
327
|
+
elif env_var_name == 'GEMINI_API_KEY':
|
|
328
|
+
key_way = "https://gemini.google.com/"
|
|
329
|
+
elif env_var_name == 'INTERNLM_API_KEY':
|
|
330
|
+
key_way = "https://internlm-chat.intern-ai.org.cn/puyu/api/v1"
|
|
331
|
+
elif env_var_name == 'GROQ_API_KEY':
|
|
332
|
+
key_way = "https://api.groq.com/openai/v1"
|
|
333
|
+
elif env_var_name == 'MISTRAL_API_KEY':
|
|
334
|
+
key_way = "https://mistral.ai/"
|
|
335
|
+
elif env_var_name == 'MOONSHOT_API_KEY':
|
|
336
|
+
key_way = "https://api.moonshot.cn/v1"
|
|
337
|
+
elif env_var_name == 'NVIDIA_API_KEY':
|
|
338
|
+
key_way = "https://integrate.api.nvidia.com/"
|
|
339
|
+
elif env_var_name == 'OPENAI_COMPATIBILIY_API_KEY':
|
|
340
|
+
key_way = "https://platform.openai.com/docs/overview"
|
|
341
|
+
elif env_var_name == 'QWEN_API_KEY':
|
|
342
|
+
key_way = "https://tongyi.aliyun.com/"
|
|
343
|
+
elif env_var_name == 'REKA_API_KEY':
|
|
344
|
+
key_way = "https://docs.reka.ai/quick-start"
|
|
345
|
+
elif env_var_name == 'SAMBA_API_KEY':
|
|
346
|
+
key_way = "https://community.sambanova.ai/t/looking-for-api-key-and-url-for-sambanova/576"
|
|
347
|
+
elif env_var_name == 'TOGETHER_API_KEY':
|
|
348
|
+
key_way = "https://docs.together.ai/docs/quickstart"
|
|
349
|
+
elif env_var_name == 'YI_API_KEY':
|
|
350
|
+
key_way = "https://platform.lingyiwanwu.com/docs"
|
|
351
|
+
elif env_var_name == 'ZHIPUAI_API_KEY':
|
|
352
|
+
key_way = "https://www.zhipuai.cn/"
|
|
353
|
+
|
|
309
354
|
if missing_keys:
|
|
310
355
|
raise ValueError(
|
|
311
356
|
"Missing or empty required API keys in "
|
|
312
|
-
f"environment variables: {', '.join(missing_keys)}"
|
|
357
|
+
f"environment variables: {', '.join(missing_keys)}.\n"
|
|
358
|
+
f"You can obtain the API key from {key_way}"
|
|
313
359
|
)
|
|
314
360
|
return func(*args, **kwargs)
|
|
315
361
|
|
|
@@ -905,3 +951,70 @@ def generate_prompt_for_structured_output(
|
|
|
905
951
|
{user_prompt}
|
|
906
952
|
"""
|
|
907
953
|
return final_prompt
|
|
954
|
+
|
|
955
|
+
|
|
956
|
+
def with_timeout(timeout=None):
|
|
957
|
+
r"""Decorator that adds timeout functionality to functions.
|
|
958
|
+
|
|
959
|
+
Executes functions with a specified timeout value. Returns a timeout
|
|
960
|
+
message if execution time is exceeded.
|
|
961
|
+
|
|
962
|
+
Args:
|
|
963
|
+
timeout (float, optional): The timeout duration in seconds. If None,
|
|
964
|
+
will try to get timeout from the instance's timeout attribute.
|
|
965
|
+
(default: :obj:`None`)
|
|
966
|
+
|
|
967
|
+
Example:
|
|
968
|
+
>>> @with_timeout(5)
|
|
969
|
+
... def my_function():
|
|
970
|
+
... return "Success"
|
|
971
|
+
>>> my_function()
|
|
972
|
+
|
|
973
|
+
>>> class MyClass:
|
|
974
|
+
... timeout = 5
|
|
975
|
+
... @with_timeout()
|
|
976
|
+
... def my_method(self):
|
|
977
|
+
... return "Success"
|
|
978
|
+
"""
|
|
979
|
+
|
|
980
|
+
def decorator(func):
|
|
981
|
+
@functools.wraps(func)
|
|
982
|
+
def wrapper(*args, **kwargs):
|
|
983
|
+
# Determine the effective timeout value
|
|
984
|
+
effective_timeout = timeout
|
|
985
|
+
if effective_timeout is None and args:
|
|
986
|
+
effective_timeout = getattr(args[0], 'timeout', None)
|
|
987
|
+
|
|
988
|
+
# If no timeout value is provided, execute function normally
|
|
989
|
+
if effective_timeout is None:
|
|
990
|
+
return func(*args, **kwargs)
|
|
991
|
+
|
|
992
|
+
# Container to hold the result of the function call
|
|
993
|
+
result_container = []
|
|
994
|
+
|
|
995
|
+
def target():
|
|
996
|
+
result_container.append(func(*args, **kwargs))
|
|
997
|
+
|
|
998
|
+
# Start the function in a new thread
|
|
999
|
+
thread = threading.Thread(target=target)
|
|
1000
|
+
thread.start()
|
|
1001
|
+
thread.join(effective_timeout)
|
|
1002
|
+
|
|
1003
|
+
# Check if the thread is still alive after the timeout
|
|
1004
|
+
if thread.is_alive():
|
|
1005
|
+
return (
|
|
1006
|
+
f"Function `{func.__name__}` execution timed out, "
|
|
1007
|
+
f"exceeded {effective_timeout} seconds."
|
|
1008
|
+
)
|
|
1009
|
+
else:
|
|
1010
|
+
return result_container[0]
|
|
1011
|
+
|
|
1012
|
+
return wrapper
|
|
1013
|
+
|
|
1014
|
+
# Handle both @with_timeout and @with_timeout() usage
|
|
1015
|
+
if callable(timeout):
|
|
1016
|
+
# If timeout is passed as a function, apply it to the decorator
|
|
1017
|
+
func, timeout = timeout, None
|
|
1018
|
+
return decorator(func)
|
|
1019
|
+
|
|
1020
|
+
return decorator
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
from .base import BaseVerifier
|
|
15
|
+
from .models import VerificationOutcome, VerifierInput
|
|
16
|
+
from .python_verifier import PythonVerifier
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"BaseVerifier",
|
|
20
|
+
"VerificationOutcome",
|
|
21
|
+
"VerifierInput",
|
|
22
|
+
"PythonVerifier",
|
|
23
|
+
]
|
camel/verifiers/base.py
ADDED
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
import asyncio
|
|
15
|
+
import time
|
|
16
|
+
from abc import ABC, abstractmethod
|
|
17
|
+
from typing import List, Optional
|
|
18
|
+
|
|
19
|
+
from camel.logger import get_logger
|
|
20
|
+
from camel.utils import BatchProcessor
|
|
21
|
+
|
|
22
|
+
from .models import (
|
|
23
|
+
VerificationOutcome,
|
|
24
|
+
VerificationResult,
|
|
25
|
+
VerifierInput,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
logger = get_logger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class BaseVerifier(ABC):
|
|
32
|
+
r"""Base class for all verifiers.
|
|
33
|
+
|
|
34
|
+
Example:
|
|
35
|
+
```python
|
|
36
|
+
verifier = MyVerifier()
|
|
37
|
+
await verifier.setup()
|
|
38
|
+
result = await verifier.verify(response)
|
|
39
|
+
await verifier.cleanup()
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Key Features:
|
|
43
|
+
- Async verification with retry logic
|
|
44
|
+
- Comprehensive error handling and logging
|
|
45
|
+
- Configurable batch processing
|
|
46
|
+
- Resource monitoring for adaptive scaling
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
max_parallel: Optional[int] = None,
|
|
52
|
+
timeout: Optional[float] = None,
|
|
53
|
+
max_retries: int = 3,
|
|
54
|
+
retry_delay: float = 1.0,
|
|
55
|
+
initial_batch_size: Optional[int] = None,
|
|
56
|
+
cpu_threshold: float = 80.0,
|
|
57
|
+
memory_threshold: float = 85.0,
|
|
58
|
+
**kwargs,
|
|
59
|
+
):
|
|
60
|
+
r"""Initialize the verifier with configuration parameters.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
max_parallel: Maximum number of parallel verifications. If None,
|
|
64
|
+
determined dynamically based on system resources.
|
|
65
|
+
(default: :obj:`None`)
|
|
66
|
+
timeout: Timeout in seconds for each verification. (default:
|
|
67
|
+
:obj:`None`)
|
|
68
|
+
max_retries: Maximum number of retry attempts. (default: :obj:`3`)
|
|
69
|
+
retry_delay: Delay between retries in seconds. (default:
|
|
70
|
+
:obj:`1.0`)
|
|
71
|
+
initial_batch_size: Initial size for batch processing. If None,
|
|
72
|
+
defaults to 10. (default: :obj:`None`)
|
|
73
|
+
cpu_threshold: CPU usage percentage threshold for scaling down.
|
|
74
|
+
(default: :obj:`80.0`)
|
|
75
|
+
memory_threshold: Memory usage percentage threshold for scaling
|
|
76
|
+
down. (default: :obj:`85.0`)
|
|
77
|
+
**kwargs: Additional verifier parameters.
|
|
78
|
+
"""
|
|
79
|
+
self._is_setup: bool = False
|
|
80
|
+
self._max_parallel: Optional[int] = max_parallel
|
|
81
|
+
self._timeout: Optional[float] = timeout
|
|
82
|
+
self._max_retries: int = max_retries
|
|
83
|
+
self._retry_delay: float = retry_delay
|
|
84
|
+
self._initial_batch_size: Optional[int] = initial_batch_size
|
|
85
|
+
self._cpu_threshold: float = cpu_threshold
|
|
86
|
+
self._memory_threshold: float = memory_threshold
|
|
87
|
+
self._batch_processor: BatchProcessor = BatchProcessor()
|
|
88
|
+
|
|
89
|
+
async def setup(self) -> None:
|
|
90
|
+
r"""Set up the verifier with necessary resources.
|
|
91
|
+
|
|
92
|
+
Initializes:
|
|
93
|
+
1. Batch processor with validated parameters
|
|
94
|
+
2. Any verifier-specific resources
|
|
95
|
+
|
|
96
|
+
Raises:
|
|
97
|
+
RuntimeError: If setup fails or resources cannot be initialized.
|
|
98
|
+
"""
|
|
99
|
+
if self._is_setup:
|
|
100
|
+
logger.debug(f"{self.__class__.__name__} already initialized")
|
|
101
|
+
return
|
|
102
|
+
|
|
103
|
+
try:
|
|
104
|
+
batch_size = max(1, self._initial_batch_size or 10)
|
|
105
|
+
max_parallel = max(1, self._max_parallel or 1)
|
|
106
|
+
self._batch_processor = BatchProcessor()
|
|
107
|
+
|
|
108
|
+
logger.info(
|
|
109
|
+
f"{self.__class__.__name__} initialized with "
|
|
110
|
+
f"batch_size={batch_size}, max_parallel={max_parallel}"
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
await self._setup()
|
|
114
|
+
self._is_setup = True
|
|
115
|
+
|
|
116
|
+
except Exception as e:
|
|
117
|
+
error_msg = (
|
|
118
|
+
f"Failed to initialize {self.__class__.__name__}: {e!s}"
|
|
119
|
+
)
|
|
120
|
+
logger.error(error_msg, exc_info=True)
|
|
121
|
+
await self.cleanup()
|
|
122
|
+
raise RuntimeError(error_msg) from e
|
|
123
|
+
|
|
124
|
+
@abstractmethod
|
|
125
|
+
async def _setup(self) -> None:
|
|
126
|
+
r"""Implement verifier-specific setup logic."""
|
|
127
|
+
pass
|
|
128
|
+
|
|
129
|
+
async def cleanup(self) -> None:
|
|
130
|
+
r"""Clean up verifier resources.
|
|
131
|
+
|
|
132
|
+
Ensures:
|
|
133
|
+
1. Batch processor is reset
|
|
134
|
+
2. All internal states are cleared
|
|
135
|
+
|
|
136
|
+
Raises:
|
|
137
|
+
RuntimeError: If cleanup fails.
|
|
138
|
+
"""
|
|
139
|
+
if not self._is_setup:
|
|
140
|
+
return
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
self._batch_processor = BatchProcessor()
|
|
144
|
+
await self._cleanup()
|
|
145
|
+
logger.info(f"{self.__class__.__name__} cleaned up successfully")
|
|
146
|
+
|
|
147
|
+
except Exception as e:
|
|
148
|
+
error_msg = f"Failed to cleanup {self.__class__.__name__}: {e!s}"
|
|
149
|
+
logger.error(error_msg, exc_info=True)
|
|
150
|
+
raise RuntimeError(error_msg) from e
|
|
151
|
+
|
|
152
|
+
finally:
|
|
153
|
+
self._is_setup = False
|
|
154
|
+
|
|
155
|
+
@abstractmethod
|
|
156
|
+
async def _cleanup(self) -> None:
|
|
157
|
+
r"""Implement verifier-specific cleanup logic."""
|
|
158
|
+
pass
|
|
159
|
+
|
|
160
|
+
async def verify(self, result: VerifierInput) -> VerificationResult:
|
|
161
|
+
r"""Perform verification with full error handling.
|
|
162
|
+
|
|
163
|
+
Verifies correctness, expected output, reasoning, and symbolic
|
|
164
|
+
consistency.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
result: The response to verify.
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
VerificationResult: Structured result containing:
|
|
171
|
+
- status: SUCCESS/FAILURE/ERROR/TIMEOUT
|
|
172
|
+
- result: Verification outcome description
|
|
173
|
+
- duration: Time taken for verification
|
|
174
|
+
- metadata: Additional details
|
|
175
|
+
- error_message: Error description if applicable
|
|
176
|
+
|
|
177
|
+
Raises:
|
|
178
|
+
RuntimeError: If verification fails unexpectedly.
|
|
179
|
+
asyncio.TimeoutError: If verification times out.
|
|
180
|
+
"""
|
|
181
|
+
if not self._is_setup:
|
|
182
|
+
logger.warning(
|
|
183
|
+
f"{self.__class__.__name__} not set up, calling setup()"
|
|
184
|
+
)
|
|
185
|
+
await self.setup()
|
|
186
|
+
|
|
187
|
+
attempt = 0
|
|
188
|
+
start_time = time.time()
|
|
189
|
+
|
|
190
|
+
while attempt < self._max_retries:
|
|
191
|
+
try:
|
|
192
|
+
verification_result = (
|
|
193
|
+
await asyncio.wait_for(
|
|
194
|
+
self._verify_implementation(result),
|
|
195
|
+
timeout=self._timeout,
|
|
196
|
+
)
|
|
197
|
+
if self._timeout
|
|
198
|
+
else await self._verify_implementation(result)
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
verification_result.duration = time.time() - start_time
|
|
202
|
+
verification_result.metadata["attempt"] = attempt + 1
|
|
203
|
+
return verification_result
|
|
204
|
+
|
|
205
|
+
except asyncio.TimeoutError:
|
|
206
|
+
attempt += 1
|
|
207
|
+
if attempt == self._max_retries:
|
|
208
|
+
return VerificationResult(
|
|
209
|
+
status=VerificationOutcome.TIMEOUT,
|
|
210
|
+
result="",
|
|
211
|
+
error_message="Verification timed out "
|
|
212
|
+
"after all retries.",
|
|
213
|
+
duration=time.time() - start_time,
|
|
214
|
+
metadata={"attempt": attempt},
|
|
215
|
+
)
|
|
216
|
+
logger.warning(
|
|
217
|
+
f"Verification timeout on attempt {attempt}, retrying..."
|
|
218
|
+
)
|
|
219
|
+
await asyncio.sleep(self._retry_delay)
|
|
220
|
+
|
|
221
|
+
except Exception as e:
|
|
222
|
+
attempt += 1
|
|
223
|
+
if attempt == self._max_retries:
|
|
224
|
+
return VerificationResult(
|
|
225
|
+
status=VerificationOutcome.ERROR,
|
|
226
|
+
result="",
|
|
227
|
+
error_message=f"Verification failed: {e!s}",
|
|
228
|
+
duration=time.time() - start_time,
|
|
229
|
+
metadata={"attempt": attempt},
|
|
230
|
+
)
|
|
231
|
+
await asyncio.sleep(self._retry_delay)
|
|
232
|
+
|
|
233
|
+
return VerificationResult(
|
|
234
|
+
status=VerificationOutcome.ERROR,
|
|
235
|
+
result="",
|
|
236
|
+
error_message="Unexpected code path reached",
|
|
237
|
+
duration=time.time() - start_time,
|
|
238
|
+
metadata={"attempt": attempt},
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
@abstractmethod
|
|
242
|
+
async def _verify_implementation(
|
|
243
|
+
self, result: VerifierInput
|
|
244
|
+
) -> VerificationResult:
|
|
245
|
+
r"""Implement the actual verification logic.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
result: The response to verify.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
VerificationResult: Containing the verification outcome.
|
|
252
|
+
|
|
253
|
+
Raises:
|
|
254
|
+
NotImplementedError: Must be implemented in subclasses.
|
|
255
|
+
"""
|
|
256
|
+
raise NotImplementedError(
|
|
257
|
+
"Subclasses must implement _verify_implementation()"
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
async def verify_batch(
|
|
262
|
+
self, results: List[VerifierInput], raise_on_error: bool = False
|
|
263
|
+
) -> List[VerificationResult]:
|
|
264
|
+
r"""Verify multiple results in parallel with controlled concurrency.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
results: List of responses to verify.
|
|
268
|
+
raise_on_error: Whether to raise an exception if any verification
|
|
269
|
+
fails. (default: :obj:`False`)
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
List[VerificationResult]: One for each input response.
|
|
273
|
+
|
|
274
|
+
Raises:
|
|
275
|
+
RuntimeError: If any verification fails and raise_on_error is True.
|
|
276
|
+
asyncio.TimeoutError: If verifications time out and max retries
|
|
277
|
+
exceeded.
|
|
278
|
+
"""
|
|
279
|
+
if not self._is_setup:
|
|
280
|
+
logger.warning(
|
|
281
|
+
f"{self.__class__.__name__} not set up, calling setup()"
|
|
282
|
+
)
|
|
283
|
+
await self.setup()
|
|
284
|
+
|
|
285
|
+
# Get current batch parameters from processor with defaults if not
|
|
286
|
+
# present
|
|
287
|
+
max_workers = getattr(
|
|
288
|
+
self._batch_processor, 'max_workers', self._max_parallel or 1
|
|
289
|
+
)
|
|
290
|
+
batch_size = getattr(
|
|
291
|
+
self._batch_processor, 'batch_size', self._initial_batch_size or 10
|
|
292
|
+
)
|
|
293
|
+
semaphore = asyncio.Semaphore(max(1, max_workers))
|
|
294
|
+
|
|
295
|
+
async def _verify_with_semaphore(
|
|
296
|
+
response: VerifierInput,
|
|
297
|
+
) -> VerificationResult:
|
|
298
|
+
start_time = time.time()
|
|
299
|
+
try:
|
|
300
|
+
async with semaphore:
|
|
301
|
+
verification_result = await self.verify(response)
|
|
302
|
+
processing_time = time.time() - start_time
|
|
303
|
+
success = verification_result.status == VerificationOutcome.SUCCESS
|
|
304
|
+
self._batch_processor.adjust_batch_size(success, processing_time)
|
|
305
|
+
return verification_result
|
|
306
|
+
except Exception as e:
|
|
307
|
+
processing_time = time.time() - start_time
|
|
308
|
+
self._batch_processor.adjust_batch_size(False, processing_time)
|
|
309
|
+
logger.error(f"Verification failed: {e!s}", exc_info=True)
|
|
310
|
+
return VerificationResult(
|
|
311
|
+
status=VerificationOutcome.ERROR,
|
|
312
|
+
result="",
|
|
313
|
+
error_message=str(e),
|
|
314
|
+
metadata={"error_type": type(e).__name__},
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
# Process in batches
|
|
318
|
+
all_results: List[VerificationResult] = []
|
|
319
|
+
for i in range(0, len(results), batch_size):
|
|
320
|
+
batch = results[i : i + batch_size]
|
|
321
|
+
verification_tasks = [
|
|
322
|
+
_verify_with_semaphore(result) for result in batch
|
|
323
|
+
]
|
|
324
|
+
try:
|
|
325
|
+
batch_results = await asyncio.gather(*verification_tasks)
|
|
326
|
+
all_results.extend(batch_results)
|
|
327
|
+
except Exception as e:
|
|
328
|
+
logger.error(f"Batch verification failed: {e!s}", exc_info=True)
|
|
329
|
+
if raise_on_error:
|
|
330
|
+
raise RuntimeError(f"Batch verification failed: {e!s}") from e
|
|
331
|
+
|
|
332
|
+
if raise_on_error and any(
|
|
333
|
+
r.status in {VerificationOutcome.ERROR, VerificationOutcome.TIMEOUT}
|
|
334
|
+
for r in all_results
|
|
335
|
+
):
|
|
336
|
+
error_msg = "One or more verifications failed"
|
|
337
|
+
logger.error(error_msg)
|
|
338
|
+
raise RuntimeError(error_msg)
|
|
339
|
+
|
|
340
|
+
return all_results
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
from datetime import datetime
|
|
15
|
+
from enum import Enum
|
|
16
|
+
from typing import Any, Dict, Optional
|
|
17
|
+
|
|
18
|
+
from pydantic import BaseModel, Field
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class VerifierInput(BaseModel):
|
|
22
|
+
r"""Structured input to the verifier"""
|
|
23
|
+
|
|
24
|
+
llm_response: str = Field(
|
|
25
|
+
description="The LLM response to be verified."
|
|
26
|
+
"Needs to be in a format that the verifier can handle."
|
|
27
|
+
)
|
|
28
|
+
ground_truth: Optional[str] = Field(
|
|
29
|
+
None, description="The ground truth data, if available."
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class VerificationOutcome(Enum):
|
|
34
|
+
r"""Enum representing the status of a verification."""
|
|
35
|
+
|
|
36
|
+
SUCCESS = "success"
|
|
37
|
+
FAILURE = "failure"
|
|
38
|
+
ERROR = "error"
|
|
39
|
+
TIMEOUT = "timeout"
|
|
40
|
+
|
|
41
|
+
def __bool__(self):
|
|
42
|
+
r"""Only VerificationOutcome.SUCCESS is truthy; others are falsy."""
|
|
43
|
+
return self is VerificationOutcome.SUCCESS
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class VerificationResult(BaseModel):
|
|
47
|
+
r"""Structured result from a verification."""
|
|
48
|
+
|
|
49
|
+
status: VerificationOutcome = Field(
|
|
50
|
+
description="Status of the verification"
|
|
51
|
+
)
|
|
52
|
+
result: str = Field(description="Verification result")
|
|
53
|
+
duration: float = Field(
|
|
54
|
+
default=0.0, description="Duration of verification in seconds"
|
|
55
|
+
)
|
|
56
|
+
timestamp: datetime = Field(
|
|
57
|
+
default_factory=datetime.now,
|
|
58
|
+
description="When the verification was performed",
|
|
59
|
+
)
|
|
60
|
+
metadata: Dict[str, Any] = Field(
|
|
61
|
+
default_factory=dict,
|
|
62
|
+
description="Additional metadata about the verification",
|
|
63
|
+
)
|
|
64
|
+
error_message: Optional[str] = Field(
|
|
65
|
+
default=None, description="Error message if verification failed"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class VerifierConfig(BaseModel):
|
|
70
|
+
r"""Configuration for verifier behavior."""
|
|
71
|
+
|
|
72
|
+
enabled: bool = Field(True, description="Whether verification is enabled")
|
|
73
|
+
strict_mode: bool = Field(
|
|
74
|
+
False, description="Whether to fail on any validation error"
|
|
75
|
+
)
|
|
76
|
+
timeout: Optional[float] = Field(
|
|
77
|
+
None, description="Verification timeout in seconds"
|
|
78
|
+
)
|
|
79
|
+
max_retries: int = Field(3, description="Maximum number of retry attempts")
|
|
80
|
+
retry_delay: float = Field(
|
|
81
|
+
1.0, description="Delay between retries in seconds"
|
|
82
|
+
)
|