camel-ai 0.2.29__py3-none-any.whl → 0.2.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

@@ -0,0 +1,271 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+ from abc import ABC, abstractmethod
16
+ from typing import Any, Dict, List, Optional, Tuple
17
+
18
+ from camel.extractors.base import BaseExtractor
19
+ from camel.logger import get_logger
20
+
21
+ from .models import Action, Observation, StepResult
22
+
23
+ logger = get_logger(__name__)
24
+
25
+
26
+ class MultiStepEnv(ABC):
27
+ r"""A multi-step environment for reinforcement learning with LLMs."""
28
+
29
+ def __init__(
30
+ self,
31
+ extractor: BaseExtractor,
32
+ max_steps: Optional[int] = None,
33
+ **kwargs,
34
+ ) -> None:
35
+ r"""Initialize the environment.
36
+
37
+ Args:
38
+ extractor: Extractor to process LLM responses.
39
+ max_steps: Maximum steps per episode.
40
+ **kwargs: Additional environment parameters.
41
+ """
42
+ self.extractor = extractor
43
+ self.max_steps = max_steps
44
+ self._metadata = kwargs
45
+
46
+ # State tracking
47
+ self._is_setup: bool = False
48
+ self._current_step: int = 0
49
+ self._episode_ended: bool = False
50
+ self._state: Dict[str, Any] = self._get_initial_state()
51
+ self._last_observation: Optional[Observation] = None
52
+ self._episode_history: List[Tuple[Observation, Action]] = []
53
+
54
+ async def setup(self) -> None:
55
+ r"""Set up the environment by initializing the verifier and extractor.
56
+
57
+ This method ensures that the environment is ready for interaction.
58
+ It sets up necessary components, including the verifier and extractor.
59
+
60
+ Raises:
61
+ Exception: If setup fails due to an internal error.
62
+ """
63
+
64
+ if self._is_setup:
65
+ return
66
+
67
+ try:
68
+ await self.extractor.setup()
69
+ await self._setup()
70
+ self._is_setup = True
71
+ logger.info('Environment setup completed successfully')
72
+ except Exception as e:
73
+ logger.error(f'Failed to setup environment: {e}')
74
+ raise
75
+
76
+ @abstractmethod
77
+ async def _setup(self) -> None:
78
+ pass
79
+
80
+ async def close(self) -> None:
81
+ r"""Clean up and close all resources used by the environment.
82
+ This method shuts down the verifier, calls the internal
83
+ close function that is implemented in any MultiStepEnv,
84
+ and ensures that the environment is properly closed.
85
+
86
+ Raises:
87
+ Exception: If an error occurs while closing the environment.
88
+ """
89
+ if not self._is_setup:
90
+ return
91
+
92
+ try:
93
+ await self.extractor.cleanup()
94
+
95
+ await self._close()
96
+
97
+ self._is_setup = False
98
+ logger.info('Environment teardown completed successfully')
99
+ except Exception as e:
100
+ logger.error(f'Failed to teardown environment: {e}')
101
+ raise
102
+
103
+ @abstractmethod
104
+ async def _close(self) -> None:
105
+ pass
106
+
107
+ async def reset(self) -> Observation:
108
+ r"""Reset the environment to an initial state.
109
+
110
+ Returns:
111
+ Observation: The initial observation for the episode.
112
+
113
+ Raises:
114
+ RuntimeError: If we fail to get the initial observation.
115
+ """
116
+
117
+ if not self._is_setup:
118
+ await self.setup()
119
+
120
+ # Reset state
121
+ self._current_step = 0
122
+ self._episode_ended = False
123
+ self._episode_history = []
124
+ self._state = self._get_initial_state()
125
+
126
+ # Get initial observation
127
+ observation = self._get_next_observation()
128
+ if observation is None:
129
+ raise RuntimeError("Failed to get initial observation")
130
+
131
+ self._last_observation = observation
132
+
133
+ return observation
134
+
135
+ async def step(self, action: Action) -> StepResult:
136
+ r"""Take a step in the environment using the given action.
137
+
138
+ This method updates the environment state based on the LLM's response,
139
+ computes rewards, checks if the episode is done, and based on that
140
+ gets the next or final observation.
141
+
142
+ Args:
143
+ action (Action): The action containing the LLM response.
144
+
145
+ Returns:
146
+ StepResult containing next observation, total reward, a dictionary
147
+ of rewards, done flag, and info.
148
+
149
+ Raises:
150
+ RuntimeError: If the environment is not set up, the episode has
151
+ ended, or there is no valid current observation.
152
+ """
153
+ if self.max_steps and self._current_step >= self.max_steps:
154
+ return StepResult(
155
+ observation=self._get_terminal_observation(),
156
+ reward=0,
157
+ rewards_dict={},
158
+ done=True,
159
+ info={"reason": "max_steps_reached"},
160
+ )
161
+
162
+ if not self._is_setup:
163
+ raise RuntimeError("Environment not set up. Call setup() first.")
164
+ if self._episode_ended:
165
+ raise RuntimeError("Episode has ended. Call reset() first.")
166
+ if self._last_observation is None:
167
+ raise RuntimeError("No current observation. Call reset() first.")
168
+
169
+ self._current_step += 1
170
+
171
+ current_obs: Observation = self._last_observation
172
+ self._episode_history.append((current_obs, action))
173
+
174
+ # Update the environment state based on the action
175
+ await self._update_state(action)
176
+
177
+ # Compute rewards
178
+ total_reward, rewards_dict = await self.compute_reward()
179
+
180
+ # Check termination
181
+ done = self.is_done()
182
+
183
+ # Get next observation based on the updated state
184
+ next_obs = (
185
+ self._get_terminal_observation()
186
+ if done
187
+ else self._get_next_observation()
188
+ )
189
+
190
+ self._last_observation = next_obs
191
+ self._episode_ended = done
192
+
193
+ return StepResult(
194
+ observation=next_obs,
195
+ reward=total_reward,
196
+ rewards_dict=rewards_dict,
197
+ done=done,
198
+ info={
199
+ "extraction_result": self.extractor.extract(
200
+ action.llm_response
201
+ ),
202
+ "step": self._current_step,
203
+ "state": self._state, # Updated state
204
+ },
205
+ )
206
+
207
+ @abstractmethod
208
+ def _get_initial_state(self) -> Dict[str, Any]:
209
+ pass
210
+
211
+ @abstractmethod
212
+ async def _update_state(self, action: Action) -> None:
213
+ pass
214
+
215
+ @abstractmethod
216
+ def _get_next_observation(self) -> Observation:
217
+ pass
218
+
219
+ @abstractmethod
220
+ def _get_terminal_observation(self) -> Observation:
221
+ pass
222
+
223
+ @abstractmethod
224
+ async def compute_reward(
225
+ self,
226
+ ) -> Tuple[float, Dict[str, float]]:
227
+ pass
228
+
229
+ def is_done(self) -> bool:
230
+ r"""Check if the episode should terminate.
231
+
232
+ This function terminates the episode if the maximum number of
233
+ steps is reached or if any other terminating criterion is met.
234
+
235
+ Returns:
236
+ bool: A boolean flag.
237
+ """
238
+
239
+ # After too many steps
240
+ if self.max_steps and self._current_step >= self.max_steps:
241
+ return True
242
+
243
+ # Further termination logic can be implemented in subclass
244
+ if self._is_done():
245
+ return True
246
+
247
+ return False
248
+
249
+ @abstractmethod
250
+ def _is_done(self) -> bool:
251
+ pass
252
+
253
+ @property
254
+ def metadata(self) -> Dict[str, Any]:
255
+ r"""Retrieve the metadata of the environment.
256
+
257
+ This provides additional parameters and configuration details.
258
+
259
+ Returns:
260
+ Dict[str, Any]: A copy of the environment's metadata.
261
+ """
262
+ return self._metadata.copy()
263
+
264
+ @property
265
+ def current_step(self) -> int:
266
+ r"""Get the current step number.
267
+
268
+ Returns:
269
+ int: The number of the step we are currently in.
270
+ """
271
+ return self._current_step
@@ -0,0 +1,293 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+
16
+ from abc import abstractmethod
17
+ from typing import Any, Dict, Optional, Tuple, Union
18
+
19
+ from camel.datasets import BaseGenerator, DataPoint, StaticDataset
20
+ from camel.extractors.base import BaseExtractor
21
+ from camel.logger import get_logger
22
+ from camel.verifiers.base import (
23
+ BaseVerifier,
24
+ VerificationResult,
25
+ )
26
+ from camel.verifiers.models import (
27
+ VerifierInput,
28
+ )
29
+
30
+ from .models import Action, Observation, StepResult
31
+
32
+ logger = get_logger(__name__)
33
+
34
+
35
+ class SingleStepEnv:
36
+ r"""A single-step environment for reinforcement learning with LLMs.
37
+
38
+ Key Features:
39
+ - Samples questions from a dataset and asks the LLM
40
+ - Extracts verifiable information from model responses.
41
+ - Verifies extracted responses against ground truth.
42
+ - Computes and assigns rewards based on correctness.
43
+ - Supports async setup, teardown, and cleanup of resources.
44
+
45
+ This class is intended as a foundation for RL experiments involving
46
+ LLM-based policies, ensuring structured interactions between model
47
+ actions and verification mechanisms.
48
+ """
49
+
50
+ PLACEHOLDER_OBS = Observation(
51
+ question="Episode ended. This is just a placeholder."
52
+ )
53
+
54
+ ACCURACY_REWARD = 10
55
+
56
+ def __init__(
57
+ self,
58
+ dataset: Union[StaticDataset, BaseGenerator],
59
+ verifier: BaseVerifier,
60
+ extractor: BaseExtractor,
61
+ **kwargs,
62
+ ) -> None:
63
+ r"""Initialize the environment.
64
+
65
+ Args:
66
+ dataset: Dataset to sample questions from.
67
+ verifier: Verifier to check responses.
68
+ extractor: Extractor to process LLM responses.
69
+ **kwargs: Additional environment parameters.
70
+ """
71
+ self.dataset = dataset
72
+ self.verifier = verifier
73
+ self.extractor = extractor
74
+ self._metadata = kwargs
75
+
76
+ # State tracking
77
+ self._is_setup: bool = False
78
+ self._state: Optional[DataPoint] = None
79
+ self._episode_ended: bool = False
80
+
81
+ async def setup(self) -> None:
82
+ r"""Set up the environment by initializing the verifier and extractor.
83
+
84
+ This method ensures that the environment is ready for interaction.
85
+ It sets up necessary components, including the verifier and extractor.
86
+
87
+ Raises:
88
+ Exception: If setup fails due to an internal error.
89
+ """
90
+
91
+ if self._is_setup:
92
+ return
93
+
94
+ try:
95
+ await self.verifier.setup()
96
+ await self.extractor.setup()
97
+
98
+ self._is_setup = True
99
+ logger.info('Environment setup completed successfully')
100
+ except Exception as e:
101
+ logger.error(f'Failed to setup environment: {e}')
102
+ raise
103
+
104
+ async def close(self) -> None:
105
+ r"""Clean up and close all resources used by the environment.
106
+
107
+ This method shuts down the verifier and extractor, resets the internal
108
+ state, and ensures that the environment is properly closed.
109
+
110
+ Raises:
111
+ Exception: If an error occurs while closing the environment.
112
+ """
113
+
114
+ if not self._is_setup:
115
+ return
116
+
117
+ try:
118
+ self._is_setup = False
119
+ await self.verifier.cleanup()
120
+ await self.extractor.cleanup()
121
+ self._state = None
122
+ self._episode_ended = False
123
+ logger.info('Environment closed successfully')
124
+ except Exception as e:
125
+ logger.error(f'Failed to close environment: {e}')
126
+ raise
127
+
128
+ async def reset(self) -> Observation:
129
+ r"""Reset the environment and start a new episode.
130
+
131
+ This method samples a new data point from the dataset and returns the
132
+ initial observation.
133
+
134
+ Returns:
135
+ Observation: The first observation of the new episode, including
136
+ the question.
137
+
138
+ Raises:
139
+ Exception: If the environment is not set up properly.
140
+ """
141
+
142
+ if not self._is_setup:
143
+ await self.setup()
144
+
145
+ self._episode_ended = False
146
+
147
+ # Sample a datapoint
148
+
149
+ self._state = self.dataset.sample()
150
+
151
+ observation = Observation(
152
+ question=self._state.question, context={}, metadata={}
153
+ )
154
+
155
+ return observation
156
+
157
+ async def step(self, action: Action) -> StepResult:
158
+ r"""Take a step in the environment using the given action.
159
+
160
+ This method processes the LLM response, extracts verifiable content,
161
+ verifies correctness, computes rewards, and ends the episode.
162
+
163
+ Args:
164
+ action (Action): The action containing the LLM response to
165
+ evaluate.
166
+
167
+ Returns:
168
+ StepResult: Contains the next observation (placeholder), total
169
+ reward, reward breakdown, completion flag, and additional
170
+ information.
171
+
172
+ Raises:
173
+ RuntimeError: If the environment is not set up, the episode has
174
+ ended, or there is no valid current observation.
175
+ """
176
+
177
+ if not self._is_setup:
178
+ raise RuntimeError("Environment not set up. Call setup() first.")
179
+ if self._episode_ended:
180
+ raise RuntimeError("Episode has ended. Call reset() first.")
181
+ if self._state is None:
182
+ raise RuntimeError("No current observation. Call reset() first.")
183
+
184
+ # extract verifiable part from llm response
185
+ extraction_result = await self.extractor.extract(action.llm_response)
186
+
187
+ if not extraction_result:
188
+ raise RuntimeError(f"Couldn't extract from {action.llm_response}")
189
+
190
+ # verify the extracted
191
+ verification_result = await self.verifier.verify(
192
+ VerifierInput(
193
+ llm_response=extraction_result,
194
+ ground_truth=self._state.final_answer,
195
+ )
196
+ )
197
+
198
+ # compute rewards
199
+ total_reward, rewards_dict = await self._compute_reward(
200
+ action, extraction_result, verification_result
201
+ )
202
+
203
+ self._episode_ended = True
204
+
205
+ return StepResult(
206
+ observation=self.PLACEHOLDER_OBS,
207
+ reward=total_reward,
208
+ rewards_dict=rewards_dict,
209
+ done=True,
210
+ info={
211
+ "extraction_result": extraction_result,
212
+ "verification_result": verification_result,
213
+ "state": self._state,
214
+ },
215
+ )
216
+
217
+ async def _compute_reward(
218
+ self,
219
+ action: Action,
220
+ extraction_result: str,
221
+ verification_result: VerificationResult,
222
+ ) -> Tuple[float, Dict[str, float]]:
223
+ r"""Compute reward scores based on verification results.
224
+
225
+ This method calculates the reward based on correctness and any
226
+ additional custom reward components.
227
+
228
+ Args:
229
+ action (Action): The action taken in the environment.
230
+ extraction_result (str): The extracted verifiable content from the
231
+ LLM response.
232
+ verification_result (VerificationResult): The result of verifying
233
+ the extracted response.
234
+
235
+ Returns:
236
+ Tuple[float, Dict[str, float]]: A tuple containing:
237
+ - Total reward (float)
238
+ - Dictionary of individual reward components.
239
+
240
+ Raises:
241
+ Exception: If an error occurs while computing rewards.
242
+ """
243
+
244
+ rewards: Dict[str, float] = {}
245
+
246
+ rewards["correctness"] = (
247
+ self.ACCURACY_REWARD if verification_result.status else 0.0
248
+ )
249
+
250
+ further_rewards = await self._compute_custom_reward(
251
+ action, extraction_result, verification_result
252
+ )
253
+
254
+ rewards = rewards | further_rewards
255
+
256
+ return sum(rewards.values()), rewards
257
+
258
+ @abstractmethod
259
+ async def _compute_custom_reward(
260
+ self,
261
+ action: Action,
262
+ extraction_result: str,
263
+ verification_result: VerificationResult,
264
+ ) -> Dict[str, float]:
265
+ r"""Compute additional custom reward components.
266
+
267
+ This method should be implemented by subclasses to define
268
+ domain-specific reward calculations.
269
+
270
+ Args:
271
+ action (Action): The action taken in the environment.
272
+ extraction_result (str): The extracted verifiable content from the
273
+ LLM response.
274
+ verification_result (VerificationResult): The result of verifying
275
+ the extracted response.
276
+
277
+ Returns:
278
+ Dict[str, float]: A dictionary mapping custom reward categories
279
+ to their values.
280
+ """
281
+ pass
282
+
283
+ @property
284
+ def metadata(self) -> Dict[str, Any]:
285
+ r"""Retrieve the metadata of the environment.
286
+
287
+ This provides additional parameters and configuration details.
288
+
289
+ Returns:
290
+ Dict[str, Any]: A copy of the environment's metadata.
291
+ """
292
+
293
+ return self._metadata.copy()
camel/logger.py CHANGED
@@ -12,6 +12,7 @@
12
12
  # limitations under the License.
13
13
  # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
14
 
15
+
15
16
  import logging
16
17
  import os
17
18
  import sys
@@ -42,9 +43,51 @@ def _configure_library_logging():
42
43
  _logger.debug("Existing logger configuration found, using that.")
43
44
 
44
45
 
46
+ def set_log_file(file_path):
47
+ r"""Set a file handler for the CAMEL library logging.
48
+
49
+ Args:
50
+ file_path (str): Path to the log file. If the directory doesn't exist,
51
+ it will be created.
52
+
53
+ Returns:
54
+ logging.FileHandler: The file handler that was added to the logger.
55
+ """
56
+ # Check for existing handlers to the same file
57
+ for handler in _logger.handlers:
58
+ if isinstance(handler, logging.FileHandler) and os.path.abspath(
59
+ handler.baseFilename
60
+ ) == os.path.abspath(file_path):
61
+ _logger.info(f"File handler already exists for: {file_path}")
62
+ return handler
63
+
64
+ # Create directory if it doesn't exist
65
+ log_dir = os.path.dirname(file_path)
66
+ if log_dir and not os.path.exists(log_dir):
67
+ os.makedirs(log_dir)
68
+
69
+ # Create file handler
70
+ file_handler = logging.FileHandler(file_path)
71
+ file_handler.setFormatter(
72
+ logging.Formatter(
73
+ '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
74
+ )
75
+ )
76
+
77
+ # Set the same level as the logger
78
+ file_handler.setLevel(_logger.getEffectiveLevel())
79
+
80
+ # Add the handler to the logger
81
+ _logger.addHandler(file_handler)
82
+ _logger.info(f"Log file configured at: {file_path}")
83
+
84
+ return file_handler
85
+
86
+
45
87
  def disable_logging():
46
88
  r"""Disable all logging for the CAMEL library.
47
89
 
90
+
48
91
  This function sets the log level to a value higher than CRITICAL,
49
92
  effectively disabling all log messages, and adds a NullHandler to
50
93
  suppress any potential warnings about no handlers being found.
@@ -63,6 +106,7 @@ def disable_logging():
63
106
  def enable_logging():
64
107
  r"""Enable logging for the CAMEL library.
65
108
 
109
+
66
110
  This function re-enables logging if it was previously disabled,
67
111
  and configures the library logging using the default settings.
68
112
  If the logging is already configured,
@@ -75,12 +119,14 @@ def enable_logging():
75
119
  def set_log_level(level):
76
120
  r"""Set the logging level for the CAMEL library.
77
121
 
122
+
78
123
  Args:
79
124
  level (Union[str, int]): The logging level to set. This can be a string
80
125
  (e.g., 'INFO') or a logging level constant (e.g., logging.INFO,
81
126
  logging.DEBUG).
82
127
  See https://docs.python.org/3/library/logging.html#levels
83
128
 
129
+
84
130
  Raises:
85
131
  ValueError: If the provided level is not a valid logging level.
86
132
  """
@@ -98,15 +144,25 @@ def set_log_level(level):
98
144
  )
99
145
 
100
146
  _logger.setLevel(level)
147
+
148
+ # Update level for all handlers
149
+ for handler in _logger.handlers:
150
+ try:
151
+ handler.setLevel(level)
152
+ except Exception as e:
153
+ _logger.warning(f"Failed to set level on handler {handler}: {e}")
154
+
101
155
  _logger.debug(f"Logging level set to: {logging.getLevelName(level)}")
102
156
 
103
157
 
104
158
  def get_logger(name):
105
159
  r"""Get a logger with the specified name, prefixed with 'camel.'.
106
160
 
161
+
107
162
  Args:
108
163
  name (str): The name to be appended to 'camel.' to create the logger.
109
164
 
165
+
110
166
  Returns:
111
167
  logging.Logger: A logger instance with the name 'camel.{name}'.
112
168
  """
@@ -56,8 +56,10 @@ class OpenAICompatibleModel(BaseModelBackend):
56
56
  url: Optional[str] = None,
57
57
  token_counter: Optional[BaseTokenCounter] = None,
58
58
  ) -> None:
59
- self.api_key = api_key or os.environ.get("OPENAI_COMPATIBILIY_API_KEY")
60
- self.url = url or os.environ.get("OPENAI_COMPATIBILIY_API_BASE_URL")
59
+ self.api_key = api_key or os.environ.get(
60
+ "OPENAI_COMPATIBILITY_API_KEY"
61
+ )
62
+ self.url = url or os.environ.get("OPENAI_COMPATIBILITY_API_BASE_URL")
61
63
  super().__init__(
62
64
  model_type, model_config_dict, api_key, url, token_counter
63
65
  )