camel-ai 0.2.29__py3-none-any.whl → 0.2.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/datasets/__init__.py +7 -5
- camel/datasets/base_generator.py +335 -0
- camel/datasets/models.py +61 -0
- camel/datasets/static_dataset.py +346 -0
- camel/embeddings/openai_compatible_embedding.py +4 -4
- camel/environments/__init__.py +11 -2
- camel/environments/models.py +111 -0
- camel/environments/multi_step.py +271 -0
- camel/environments/single_step.py +293 -0
- camel/logger.py +56 -0
- camel/models/openai_compatible_model.py +4 -2
- camel/toolkits/browser_toolkit.py +59 -1
- camel/toolkits/search_toolkit.py +70 -0
- camel/utils/commons.py +1 -1
- {camel_ai-0.2.29.dist-info → camel_ai-0.2.30.dist-info}/METADATA +2 -1
- {camel_ai-0.2.29.dist-info → camel_ai-0.2.30.dist-info}/RECORD +19 -15
- camel/datasets/base.py +0 -639
- camel/environments/base.py +0 -509
- {camel_ai-0.2.29.dist-info → camel_ai-0.2.30.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.29.dist-info → camel_ai-0.2.30.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
|
|
15
|
+
from abc import ABC, abstractmethod
|
|
16
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
17
|
+
|
|
18
|
+
from camel.extractors.base import BaseExtractor
|
|
19
|
+
from camel.logger import get_logger
|
|
20
|
+
|
|
21
|
+
from .models import Action, Observation, StepResult
|
|
22
|
+
|
|
23
|
+
logger = get_logger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class MultiStepEnv(ABC):
|
|
27
|
+
r"""A multi-step environment for reinforcement learning with LLMs."""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
extractor: BaseExtractor,
|
|
32
|
+
max_steps: Optional[int] = None,
|
|
33
|
+
**kwargs,
|
|
34
|
+
) -> None:
|
|
35
|
+
r"""Initialize the environment.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
extractor: Extractor to process LLM responses.
|
|
39
|
+
max_steps: Maximum steps per episode.
|
|
40
|
+
**kwargs: Additional environment parameters.
|
|
41
|
+
"""
|
|
42
|
+
self.extractor = extractor
|
|
43
|
+
self.max_steps = max_steps
|
|
44
|
+
self._metadata = kwargs
|
|
45
|
+
|
|
46
|
+
# State tracking
|
|
47
|
+
self._is_setup: bool = False
|
|
48
|
+
self._current_step: int = 0
|
|
49
|
+
self._episode_ended: bool = False
|
|
50
|
+
self._state: Dict[str, Any] = self._get_initial_state()
|
|
51
|
+
self._last_observation: Optional[Observation] = None
|
|
52
|
+
self._episode_history: List[Tuple[Observation, Action]] = []
|
|
53
|
+
|
|
54
|
+
async def setup(self) -> None:
|
|
55
|
+
r"""Set up the environment by initializing the verifier and extractor.
|
|
56
|
+
|
|
57
|
+
This method ensures that the environment is ready for interaction.
|
|
58
|
+
It sets up necessary components, including the verifier and extractor.
|
|
59
|
+
|
|
60
|
+
Raises:
|
|
61
|
+
Exception: If setup fails due to an internal error.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
if self._is_setup:
|
|
65
|
+
return
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
await self.extractor.setup()
|
|
69
|
+
await self._setup()
|
|
70
|
+
self._is_setup = True
|
|
71
|
+
logger.info('Environment setup completed successfully')
|
|
72
|
+
except Exception as e:
|
|
73
|
+
logger.error(f'Failed to setup environment: {e}')
|
|
74
|
+
raise
|
|
75
|
+
|
|
76
|
+
@abstractmethod
|
|
77
|
+
async def _setup(self) -> None:
|
|
78
|
+
pass
|
|
79
|
+
|
|
80
|
+
async def close(self) -> None:
|
|
81
|
+
r"""Clean up and close all resources used by the environment.
|
|
82
|
+
This method shuts down the verifier, calls the internal
|
|
83
|
+
close function that is implemented in any MultiStepEnv,
|
|
84
|
+
and ensures that the environment is properly closed.
|
|
85
|
+
|
|
86
|
+
Raises:
|
|
87
|
+
Exception: If an error occurs while closing the environment.
|
|
88
|
+
"""
|
|
89
|
+
if not self._is_setup:
|
|
90
|
+
return
|
|
91
|
+
|
|
92
|
+
try:
|
|
93
|
+
await self.extractor.cleanup()
|
|
94
|
+
|
|
95
|
+
await self._close()
|
|
96
|
+
|
|
97
|
+
self._is_setup = False
|
|
98
|
+
logger.info('Environment teardown completed successfully')
|
|
99
|
+
except Exception as e:
|
|
100
|
+
logger.error(f'Failed to teardown environment: {e}')
|
|
101
|
+
raise
|
|
102
|
+
|
|
103
|
+
@abstractmethod
|
|
104
|
+
async def _close(self) -> None:
|
|
105
|
+
pass
|
|
106
|
+
|
|
107
|
+
async def reset(self) -> Observation:
|
|
108
|
+
r"""Reset the environment to an initial state.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
Observation: The initial observation for the episode.
|
|
112
|
+
|
|
113
|
+
Raises:
|
|
114
|
+
RuntimeError: If we fail to get the initial observation.
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
if not self._is_setup:
|
|
118
|
+
await self.setup()
|
|
119
|
+
|
|
120
|
+
# Reset state
|
|
121
|
+
self._current_step = 0
|
|
122
|
+
self._episode_ended = False
|
|
123
|
+
self._episode_history = []
|
|
124
|
+
self._state = self._get_initial_state()
|
|
125
|
+
|
|
126
|
+
# Get initial observation
|
|
127
|
+
observation = self._get_next_observation()
|
|
128
|
+
if observation is None:
|
|
129
|
+
raise RuntimeError("Failed to get initial observation")
|
|
130
|
+
|
|
131
|
+
self._last_observation = observation
|
|
132
|
+
|
|
133
|
+
return observation
|
|
134
|
+
|
|
135
|
+
async def step(self, action: Action) -> StepResult:
|
|
136
|
+
r"""Take a step in the environment using the given action.
|
|
137
|
+
|
|
138
|
+
This method updates the environment state based on the LLM's response,
|
|
139
|
+
computes rewards, checks if the episode is done, and based on that
|
|
140
|
+
gets the next or final observation.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
action (Action): The action containing the LLM response.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
StepResult containing next observation, total reward, a dictionary
|
|
147
|
+
of rewards, done flag, and info.
|
|
148
|
+
|
|
149
|
+
Raises:
|
|
150
|
+
RuntimeError: If the environment is not set up, the episode has
|
|
151
|
+
ended, or there is no valid current observation.
|
|
152
|
+
"""
|
|
153
|
+
if self.max_steps and self._current_step >= self.max_steps:
|
|
154
|
+
return StepResult(
|
|
155
|
+
observation=self._get_terminal_observation(),
|
|
156
|
+
reward=0,
|
|
157
|
+
rewards_dict={},
|
|
158
|
+
done=True,
|
|
159
|
+
info={"reason": "max_steps_reached"},
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
if not self._is_setup:
|
|
163
|
+
raise RuntimeError("Environment not set up. Call setup() first.")
|
|
164
|
+
if self._episode_ended:
|
|
165
|
+
raise RuntimeError("Episode has ended. Call reset() first.")
|
|
166
|
+
if self._last_observation is None:
|
|
167
|
+
raise RuntimeError("No current observation. Call reset() first.")
|
|
168
|
+
|
|
169
|
+
self._current_step += 1
|
|
170
|
+
|
|
171
|
+
current_obs: Observation = self._last_observation
|
|
172
|
+
self._episode_history.append((current_obs, action))
|
|
173
|
+
|
|
174
|
+
# Update the environment state based on the action
|
|
175
|
+
await self._update_state(action)
|
|
176
|
+
|
|
177
|
+
# Compute rewards
|
|
178
|
+
total_reward, rewards_dict = await self.compute_reward()
|
|
179
|
+
|
|
180
|
+
# Check termination
|
|
181
|
+
done = self.is_done()
|
|
182
|
+
|
|
183
|
+
# Get next observation based on the updated state
|
|
184
|
+
next_obs = (
|
|
185
|
+
self._get_terminal_observation()
|
|
186
|
+
if done
|
|
187
|
+
else self._get_next_observation()
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
self._last_observation = next_obs
|
|
191
|
+
self._episode_ended = done
|
|
192
|
+
|
|
193
|
+
return StepResult(
|
|
194
|
+
observation=next_obs,
|
|
195
|
+
reward=total_reward,
|
|
196
|
+
rewards_dict=rewards_dict,
|
|
197
|
+
done=done,
|
|
198
|
+
info={
|
|
199
|
+
"extraction_result": self.extractor.extract(
|
|
200
|
+
action.llm_response
|
|
201
|
+
),
|
|
202
|
+
"step": self._current_step,
|
|
203
|
+
"state": self._state, # Updated state
|
|
204
|
+
},
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
@abstractmethod
|
|
208
|
+
def _get_initial_state(self) -> Dict[str, Any]:
|
|
209
|
+
pass
|
|
210
|
+
|
|
211
|
+
@abstractmethod
|
|
212
|
+
async def _update_state(self, action: Action) -> None:
|
|
213
|
+
pass
|
|
214
|
+
|
|
215
|
+
@abstractmethod
|
|
216
|
+
def _get_next_observation(self) -> Observation:
|
|
217
|
+
pass
|
|
218
|
+
|
|
219
|
+
@abstractmethod
|
|
220
|
+
def _get_terminal_observation(self) -> Observation:
|
|
221
|
+
pass
|
|
222
|
+
|
|
223
|
+
@abstractmethod
|
|
224
|
+
async def compute_reward(
|
|
225
|
+
self,
|
|
226
|
+
) -> Tuple[float, Dict[str, float]]:
|
|
227
|
+
pass
|
|
228
|
+
|
|
229
|
+
def is_done(self) -> bool:
|
|
230
|
+
r"""Check if the episode should terminate.
|
|
231
|
+
|
|
232
|
+
This function terminates the episode if the maximum number of
|
|
233
|
+
steps is reached or if any other terminating criterion is met.
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
bool: A boolean flag.
|
|
237
|
+
"""
|
|
238
|
+
|
|
239
|
+
# After too many steps
|
|
240
|
+
if self.max_steps and self._current_step >= self.max_steps:
|
|
241
|
+
return True
|
|
242
|
+
|
|
243
|
+
# Further termination logic can be implemented in subclass
|
|
244
|
+
if self._is_done():
|
|
245
|
+
return True
|
|
246
|
+
|
|
247
|
+
return False
|
|
248
|
+
|
|
249
|
+
@abstractmethod
|
|
250
|
+
def _is_done(self) -> bool:
|
|
251
|
+
pass
|
|
252
|
+
|
|
253
|
+
@property
|
|
254
|
+
def metadata(self) -> Dict[str, Any]:
|
|
255
|
+
r"""Retrieve the metadata of the environment.
|
|
256
|
+
|
|
257
|
+
This provides additional parameters and configuration details.
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
Dict[str, Any]: A copy of the environment's metadata.
|
|
261
|
+
"""
|
|
262
|
+
return self._metadata.copy()
|
|
263
|
+
|
|
264
|
+
@property
|
|
265
|
+
def current_step(self) -> int:
|
|
266
|
+
r"""Get the current step number.
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
int: The number of the step we are currently in.
|
|
270
|
+
"""
|
|
271
|
+
return self._current_step
|
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from abc import abstractmethod
|
|
17
|
+
from typing import Any, Dict, Optional, Tuple, Union
|
|
18
|
+
|
|
19
|
+
from camel.datasets import BaseGenerator, DataPoint, StaticDataset
|
|
20
|
+
from camel.extractors.base import BaseExtractor
|
|
21
|
+
from camel.logger import get_logger
|
|
22
|
+
from camel.verifiers.base import (
|
|
23
|
+
BaseVerifier,
|
|
24
|
+
VerificationResult,
|
|
25
|
+
)
|
|
26
|
+
from camel.verifiers.models import (
|
|
27
|
+
VerifierInput,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
from .models import Action, Observation, StepResult
|
|
31
|
+
|
|
32
|
+
logger = get_logger(__name__)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class SingleStepEnv:
|
|
36
|
+
r"""A single-step environment for reinforcement learning with LLMs.
|
|
37
|
+
|
|
38
|
+
Key Features:
|
|
39
|
+
- Samples questions from a dataset and asks the LLM
|
|
40
|
+
- Extracts verifiable information from model responses.
|
|
41
|
+
- Verifies extracted responses against ground truth.
|
|
42
|
+
- Computes and assigns rewards based on correctness.
|
|
43
|
+
- Supports async setup, teardown, and cleanup of resources.
|
|
44
|
+
|
|
45
|
+
This class is intended as a foundation for RL experiments involving
|
|
46
|
+
LLM-based policies, ensuring structured interactions between model
|
|
47
|
+
actions and verification mechanisms.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
PLACEHOLDER_OBS = Observation(
|
|
51
|
+
question="Episode ended. This is just a placeholder."
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
ACCURACY_REWARD = 10
|
|
55
|
+
|
|
56
|
+
def __init__(
|
|
57
|
+
self,
|
|
58
|
+
dataset: Union[StaticDataset, BaseGenerator],
|
|
59
|
+
verifier: BaseVerifier,
|
|
60
|
+
extractor: BaseExtractor,
|
|
61
|
+
**kwargs,
|
|
62
|
+
) -> None:
|
|
63
|
+
r"""Initialize the environment.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
dataset: Dataset to sample questions from.
|
|
67
|
+
verifier: Verifier to check responses.
|
|
68
|
+
extractor: Extractor to process LLM responses.
|
|
69
|
+
**kwargs: Additional environment parameters.
|
|
70
|
+
"""
|
|
71
|
+
self.dataset = dataset
|
|
72
|
+
self.verifier = verifier
|
|
73
|
+
self.extractor = extractor
|
|
74
|
+
self._metadata = kwargs
|
|
75
|
+
|
|
76
|
+
# State tracking
|
|
77
|
+
self._is_setup: bool = False
|
|
78
|
+
self._state: Optional[DataPoint] = None
|
|
79
|
+
self._episode_ended: bool = False
|
|
80
|
+
|
|
81
|
+
async def setup(self) -> None:
|
|
82
|
+
r"""Set up the environment by initializing the verifier and extractor.
|
|
83
|
+
|
|
84
|
+
This method ensures that the environment is ready for interaction.
|
|
85
|
+
It sets up necessary components, including the verifier and extractor.
|
|
86
|
+
|
|
87
|
+
Raises:
|
|
88
|
+
Exception: If setup fails due to an internal error.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
if self._is_setup:
|
|
92
|
+
return
|
|
93
|
+
|
|
94
|
+
try:
|
|
95
|
+
await self.verifier.setup()
|
|
96
|
+
await self.extractor.setup()
|
|
97
|
+
|
|
98
|
+
self._is_setup = True
|
|
99
|
+
logger.info('Environment setup completed successfully')
|
|
100
|
+
except Exception as e:
|
|
101
|
+
logger.error(f'Failed to setup environment: {e}')
|
|
102
|
+
raise
|
|
103
|
+
|
|
104
|
+
async def close(self) -> None:
|
|
105
|
+
r"""Clean up and close all resources used by the environment.
|
|
106
|
+
|
|
107
|
+
This method shuts down the verifier and extractor, resets the internal
|
|
108
|
+
state, and ensures that the environment is properly closed.
|
|
109
|
+
|
|
110
|
+
Raises:
|
|
111
|
+
Exception: If an error occurs while closing the environment.
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
if not self._is_setup:
|
|
115
|
+
return
|
|
116
|
+
|
|
117
|
+
try:
|
|
118
|
+
self._is_setup = False
|
|
119
|
+
await self.verifier.cleanup()
|
|
120
|
+
await self.extractor.cleanup()
|
|
121
|
+
self._state = None
|
|
122
|
+
self._episode_ended = False
|
|
123
|
+
logger.info('Environment closed successfully')
|
|
124
|
+
except Exception as e:
|
|
125
|
+
logger.error(f'Failed to close environment: {e}')
|
|
126
|
+
raise
|
|
127
|
+
|
|
128
|
+
async def reset(self) -> Observation:
|
|
129
|
+
r"""Reset the environment and start a new episode.
|
|
130
|
+
|
|
131
|
+
This method samples a new data point from the dataset and returns the
|
|
132
|
+
initial observation.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
Observation: The first observation of the new episode, including
|
|
136
|
+
the question.
|
|
137
|
+
|
|
138
|
+
Raises:
|
|
139
|
+
Exception: If the environment is not set up properly.
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
if not self._is_setup:
|
|
143
|
+
await self.setup()
|
|
144
|
+
|
|
145
|
+
self._episode_ended = False
|
|
146
|
+
|
|
147
|
+
# Sample a datapoint
|
|
148
|
+
|
|
149
|
+
self._state = self.dataset.sample()
|
|
150
|
+
|
|
151
|
+
observation = Observation(
|
|
152
|
+
question=self._state.question, context={}, metadata={}
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
return observation
|
|
156
|
+
|
|
157
|
+
async def step(self, action: Action) -> StepResult:
|
|
158
|
+
r"""Take a step in the environment using the given action.
|
|
159
|
+
|
|
160
|
+
This method processes the LLM response, extracts verifiable content,
|
|
161
|
+
verifies correctness, computes rewards, and ends the episode.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
action (Action): The action containing the LLM response to
|
|
165
|
+
evaluate.
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
StepResult: Contains the next observation (placeholder), total
|
|
169
|
+
reward, reward breakdown, completion flag, and additional
|
|
170
|
+
information.
|
|
171
|
+
|
|
172
|
+
Raises:
|
|
173
|
+
RuntimeError: If the environment is not set up, the episode has
|
|
174
|
+
ended, or there is no valid current observation.
|
|
175
|
+
"""
|
|
176
|
+
|
|
177
|
+
if not self._is_setup:
|
|
178
|
+
raise RuntimeError("Environment not set up. Call setup() first.")
|
|
179
|
+
if self._episode_ended:
|
|
180
|
+
raise RuntimeError("Episode has ended. Call reset() first.")
|
|
181
|
+
if self._state is None:
|
|
182
|
+
raise RuntimeError("No current observation. Call reset() first.")
|
|
183
|
+
|
|
184
|
+
# extract verifiable part from llm response
|
|
185
|
+
extraction_result = await self.extractor.extract(action.llm_response)
|
|
186
|
+
|
|
187
|
+
if not extraction_result:
|
|
188
|
+
raise RuntimeError(f"Couldn't extract from {action.llm_response}")
|
|
189
|
+
|
|
190
|
+
# verify the extracted
|
|
191
|
+
verification_result = await self.verifier.verify(
|
|
192
|
+
VerifierInput(
|
|
193
|
+
llm_response=extraction_result,
|
|
194
|
+
ground_truth=self._state.final_answer,
|
|
195
|
+
)
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
# compute rewards
|
|
199
|
+
total_reward, rewards_dict = await self._compute_reward(
|
|
200
|
+
action, extraction_result, verification_result
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
self._episode_ended = True
|
|
204
|
+
|
|
205
|
+
return StepResult(
|
|
206
|
+
observation=self.PLACEHOLDER_OBS,
|
|
207
|
+
reward=total_reward,
|
|
208
|
+
rewards_dict=rewards_dict,
|
|
209
|
+
done=True,
|
|
210
|
+
info={
|
|
211
|
+
"extraction_result": extraction_result,
|
|
212
|
+
"verification_result": verification_result,
|
|
213
|
+
"state": self._state,
|
|
214
|
+
},
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
async def _compute_reward(
|
|
218
|
+
self,
|
|
219
|
+
action: Action,
|
|
220
|
+
extraction_result: str,
|
|
221
|
+
verification_result: VerificationResult,
|
|
222
|
+
) -> Tuple[float, Dict[str, float]]:
|
|
223
|
+
r"""Compute reward scores based on verification results.
|
|
224
|
+
|
|
225
|
+
This method calculates the reward based on correctness and any
|
|
226
|
+
additional custom reward components.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
action (Action): The action taken in the environment.
|
|
230
|
+
extraction_result (str): The extracted verifiable content from the
|
|
231
|
+
LLM response.
|
|
232
|
+
verification_result (VerificationResult): The result of verifying
|
|
233
|
+
the extracted response.
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
Tuple[float, Dict[str, float]]: A tuple containing:
|
|
237
|
+
- Total reward (float)
|
|
238
|
+
- Dictionary of individual reward components.
|
|
239
|
+
|
|
240
|
+
Raises:
|
|
241
|
+
Exception: If an error occurs while computing rewards.
|
|
242
|
+
"""
|
|
243
|
+
|
|
244
|
+
rewards: Dict[str, float] = {}
|
|
245
|
+
|
|
246
|
+
rewards["correctness"] = (
|
|
247
|
+
self.ACCURACY_REWARD if verification_result.status else 0.0
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
further_rewards = await self._compute_custom_reward(
|
|
251
|
+
action, extraction_result, verification_result
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
rewards = rewards | further_rewards
|
|
255
|
+
|
|
256
|
+
return sum(rewards.values()), rewards
|
|
257
|
+
|
|
258
|
+
@abstractmethod
|
|
259
|
+
async def _compute_custom_reward(
|
|
260
|
+
self,
|
|
261
|
+
action: Action,
|
|
262
|
+
extraction_result: str,
|
|
263
|
+
verification_result: VerificationResult,
|
|
264
|
+
) -> Dict[str, float]:
|
|
265
|
+
r"""Compute additional custom reward components.
|
|
266
|
+
|
|
267
|
+
This method should be implemented by subclasses to define
|
|
268
|
+
domain-specific reward calculations.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
action (Action): The action taken in the environment.
|
|
272
|
+
extraction_result (str): The extracted verifiable content from the
|
|
273
|
+
LLM response.
|
|
274
|
+
verification_result (VerificationResult): The result of verifying
|
|
275
|
+
the extracted response.
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
Dict[str, float]: A dictionary mapping custom reward categories
|
|
279
|
+
to their values.
|
|
280
|
+
"""
|
|
281
|
+
pass
|
|
282
|
+
|
|
283
|
+
@property
|
|
284
|
+
def metadata(self) -> Dict[str, Any]:
|
|
285
|
+
r"""Retrieve the metadata of the environment.
|
|
286
|
+
|
|
287
|
+
This provides additional parameters and configuration details.
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
Dict[str, Any]: A copy of the environment's metadata.
|
|
291
|
+
"""
|
|
292
|
+
|
|
293
|
+
return self._metadata.copy()
|
camel/logger.py
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
|
|
15
|
+
|
|
15
16
|
import logging
|
|
16
17
|
import os
|
|
17
18
|
import sys
|
|
@@ -42,9 +43,51 @@ def _configure_library_logging():
|
|
|
42
43
|
_logger.debug("Existing logger configuration found, using that.")
|
|
43
44
|
|
|
44
45
|
|
|
46
|
+
def set_log_file(file_path):
|
|
47
|
+
r"""Set a file handler for the CAMEL library logging.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
file_path (str): Path to the log file. If the directory doesn't exist,
|
|
51
|
+
it will be created.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
logging.FileHandler: The file handler that was added to the logger.
|
|
55
|
+
"""
|
|
56
|
+
# Check for existing handlers to the same file
|
|
57
|
+
for handler in _logger.handlers:
|
|
58
|
+
if isinstance(handler, logging.FileHandler) and os.path.abspath(
|
|
59
|
+
handler.baseFilename
|
|
60
|
+
) == os.path.abspath(file_path):
|
|
61
|
+
_logger.info(f"File handler already exists for: {file_path}")
|
|
62
|
+
return handler
|
|
63
|
+
|
|
64
|
+
# Create directory if it doesn't exist
|
|
65
|
+
log_dir = os.path.dirname(file_path)
|
|
66
|
+
if log_dir and not os.path.exists(log_dir):
|
|
67
|
+
os.makedirs(log_dir)
|
|
68
|
+
|
|
69
|
+
# Create file handler
|
|
70
|
+
file_handler = logging.FileHandler(file_path)
|
|
71
|
+
file_handler.setFormatter(
|
|
72
|
+
logging.Formatter(
|
|
73
|
+
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
74
|
+
)
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# Set the same level as the logger
|
|
78
|
+
file_handler.setLevel(_logger.getEffectiveLevel())
|
|
79
|
+
|
|
80
|
+
# Add the handler to the logger
|
|
81
|
+
_logger.addHandler(file_handler)
|
|
82
|
+
_logger.info(f"Log file configured at: {file_path}")
|
|
83
|
+
|
|
84
|
+
return file_handler
|
|
85
|
+
|
|
86
|
+
|
|
45
87
|
def disable_logging():
|
|
46
88
|
r"""Disable all logging for the CAMEL library.
|
|
47
89
|
|
|
90
|
+
|
|
48
91
|
This function sets the log level to a value higher than CRITICAL,
|
|
49
92
|
effectively disabling all log messages, and adds a NullHandler to
|
|
50
93
|
suppress any potential warnings about no handlers being found.
|
|
@@ -63,6 +106,7 @@ def disable_logging():
|
|
|
63
106
|
def enable_logging():
|
|
64
107
|
r"""Enable logging for the CAMEL library.
|
|
65
108
|
|
|
109
|
+
|
|
66
110
|
This function re-enables logging if it was previously disabled,
|
|
67
111
|
and configures the library logging using the default settings.
|
|
68
112
|
If the logging is already configured,
|
|
@@ -75,12 +119,14 @@ def enable_logging():
|
|
|
75
119
|
def set_log_level(level):
|
|
76
120
|
r"""Set the logging level for the CAMEL library.
|
|
77
121
|
|
|
122
|
+
|
|
78
123
|
Args:
|
|
79
124
|
level (Union[str, int]): The logging level to set. This can be a string
|
|
80
125
|
(e.g., 'INFO') or a logging level constant (e.g., logging.INFO,
|
|
81
126
|
logging.DEBUG).
|
|
82
127
|
See https://docs.python.org/3/library/logging.html#levels
|
|
83
128
|
|
|
129
|
+
|
|
84
130
|
Raises:
|
|
85
131
|
ValueError: If the provided level is not a valid logging level.
|
|
86
132
|
"""
|
|
@@ -98,15 +144,25 @@ def set_log_level(level):
|
|
|
98
144
|
)
|
|
99
145
|
|
|
100
146
|
_logger.setLevel(level)
|
|
147
|
+
|
|
148
|
+
# Update level for all handlers
|
|
149
|
+
for handler in _logger.handlers:
|
|
150
|
+
try:
|
|
151
|
+
handler.setLevel(level)
|
|
152
|
+
except Exception as e:
|
|
153
|
+
_logger.warning(f"Failed to set level on handler {handler}: {e}")
|
|
154
|
+
|
|
101
155
|
_logger.debug(f"Logging level set to: {logging.getLevelName(level)}")
|
|
102
156
|
|
|
103
157
|
|
|
104
158
|
def get_logger(name):
|
|
105
159
|
r"""Get a logger with the specified name, prefixed with 'camel.'.
|
|
106
160
|
|
|
161
|
+
|
|
107
162
|
Args:
|
|
108
163
|
name (str): The name to be appended to 'camel.' to create the logger.
|
|
109
164
|
|
|
165
|
+
|
|
110
166
|
Returns:
|
|
111
167
|
logging.Logger: A logger instance with the name 'camel.{name}'.
|
|
112
168
|
"""
|
|
@@ -56,8 +56,10 @@ class OpenAICompatibleModel(BaseModelBackend):
|
|
|
56
56
|
url: Optional[str] = None,
|
|
57
57
|
token_counter: Optional[BaseTokenCounter] = None,
|
|
58
58
|
) -> None:
|
|
59
|
-
self.api_key = api_key or os.environ.get(
|
|
60
|
-
|
|
59
|
+
self.api_key = api_key or os.environ.get(
|
|
60
|
+
"OPENAI_COMPATIBILITY_API_KEY"
|
|
61
|
+
)
|
|
62
|
+
self.url = url or os.environ.get("OPENAI_COMPATIBILITY_API_BASE_URL")
|
|
61
63
|
super().__init__(
|
|
62
64
|
model_type, model_config_dict, api_key, url, token_counter
|
|
63
65
|
)
|