camel-ai 0.2.21__py3-none-any.whl → 0.2.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (116) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/_types.py +41 -0
  3. camel/agents/_utils.py +188 -0
  4. camel/agents/chat_agent.py +570 -965
  5. camel/agents/knowledge_graph_agent.py +7 -1
  6. camel/agents/multi_hop_generator_agent.py +1 -1
  7. camel/configs/base_config.py +10 -13
  8. camel/configs/deepseek_config.py +4 -30
  9. camel/configs/gemini_config.py +5 -31
  10. camel/configs/openai_config.py +14 -32
  11. camel/configs/qwen_config.py +36 -36
  12. camel/datagen/self_improving_cot.py +81 -3
  13. camel/datagen/self_instruct/filter/instruction_filter.py +19 -3
  14. camel/datagen/self_instruct/self_instruct.py +53 -4
  15. camel/datasets/__init__.py +28 -0
  16. camel/datasets/base.py +969 -0
  17. camel/embeddings/openai_embedding.py +10 -1
  18. camel/environments/__init__.py +16 -0
  19. camel/environments/base.py +503 -0
  20. camel/extractors/__init__.py +16 -0
  21. camel/extractors/base.py +263 -0
  22. camel/interpreters/docker/Dockerfile +12 -0
  23. camel/interpreters/docker_interpreter.py +19 -1
  24. camel/interpreters/subprocess_interpreter.py +42 -17
  25. camel/loaders/__init__.py +2 -0
  26. camel/loaders/mineru_extractor.py +250 -0
  27. camel/memories/agent_memories.py +16 -1
  28. camel/memories/blocks/chat_history_block.py +10 -2
  29. camel/memories/blocks/vectordb_block.py +1 -0
  30. camel/memories/context_creators/score_based.py +20 -3
  31. camel/memories/records.py +10 -0
  32. camel/messages/base.py +8 -8
  33. camel/models/__init__.py +2 -0
  34. camel/models/_utils.py +57 -0
  35. camel/models/aiml_model.py +48 -17
  36. camel/models/anthropic_model.py +41 -3
  37. camel/models/azure_openai_model.py +39 -3
  38. camel/models/base_audio_model.py +92 -0
  39. camel/models/base_model.py +132 -4
  40. camel/models/cohere_model.py +88 -11
  41. camel/models/deepseek_model.py +107 -63
  42. camel/models/fish_audio_model.py +18 -8
  43. camel/models/gemini_model.py +133 -15
  44. camel/models/groq_model.py +72 -10
  45. camel/models/internlm_model.py +14 -3
  46. camel/models/litellm_model.py +9 -2
  47. camel/models/mistral_model.py +42 -5
  48. camel/models/model_manager.py +57 -3
  49. camel/models/moonshot_model.py +33 -4
  50. camel/models/nemotron_model.py +32 -3
  51. camel/models/nvidia_model.py +43 -3
  52. camel/models/ollama_model.py +139 -17
  53. camel/models/openai_audio_models.py +87 -2
  54. camel/models/openai_compatible_model.py +37 -3
  55. camel/models/openai_model.py +158 -46
  56. camel/models/qwen_model.py +61 -4
  57. camel/models/reka_model.py +53 -3
  58. camel/models/samba_model.py +209 -4
  59. camel/models/sglang_model.py +153 -14
  60. camel/models/siliconflow_model.py +16 -3
  61. camel/models/stub_model.py +46 -4
  62. camel/models/togetherai_model.py +38 -3
  63. camel/models/vllm_model.py +37 -3
  64. camel/models/yi_model.py +36 -3
  65. camel/models/zhipuai_model.py +38 -3
  66. camel/retrievers/__init__.py +3 -0
  67. camel/retrievers/hybrid_retrival.py +237 -0
  68. camel/toolkits/__init__.py +20 -3
  69. camel/toolkits/arxiv_toolkit.py +2 -1
  70. camel/toolkits/ask_news_toolkit.py +4 -2
  71. camel/toolkits/audio_analysis_toolkit.py +238 -0
  72. camel/toolkits/base.py +22 -3
  73. camel/toolkits/code_execution.py +2 -0
  74. camel/toolkits/dappier_toolkit.py +2 -1
  75. camel/toolkits/data_commons_toolkit.py +38 -12
  76. camel/toolkits/excel_toolkit.py +172 -0
  77. camel/toolkits/function_tool.py +13 -0
  78. camel/toolkits/github_toolkit.py +5 -1
  79. camel/toolkits/google_maps_toolkit.py +2 -1
  80. camel/toolkits/google_scholar_toolkit.py +2 -0
  81. camel/toolkits/human_toolkit.py +0 -3
  82. camel/toolkits/image_analysis_toolkit.py +202 -0
  83. camel/toolkits/linkedin_toolkit.py +3 -2
  84. camel/toolkits/meshy_toolkit.py +3 -2
  85. camel/toolkits/mineru_toolkit.py +178 -0
  86. camel/toolkits/networkx_toolkit.py +240 -0
  87. camel/toolkits/notion_toolkit.py +2 -0
  88. camel/toolkits/openbb_toolkit.py +3 -2
  89. camel/toolkits/page_script.js +376 -0
  90. camel/toolkits/reddit_toolkit.py +11 -3
  91. camel/toolkits/retrieval_toolkit.py +6 -1
  92. camel/toolkits/semantic_scholar_toolkit.py +2 -1
  93. camel/toolkits/stripe_toolkit.py +8 -2
  94. camel/toolkits/sympy_toolkit.py +44 -1
  95. camel/toolkits/video_analysis_toolkit.py +407 -0
  96. camel/toolkits/{video_toolkit.py → video_download_toolkit.py} +21 -25
  97. camel/toolkits/web_toolkit.py +1307 -0
  98. camel/toolkits/whatsapp_toolkit.py +3 -2
  99. camel/toolkits/zapier_toolkit.py +191 -0
  100. camel/types/__init__.py +2 -2
  101. camel/types/agents/__init__.py +16 -0
  102. camel/types/agents/tool_calling_record.py +52 -0
  103. camel/types/enums.py +3 -0
  104. camel/types/openai_types.py +16 -14
  105. camel/utils/__init__.py +2 -1
  106. camel/utils/async_func.py +2 -2
  107. camel/utils/commons.py +114 -1
  108. camel/verifiers/__init__.py +23 -0
  109. camel/verifiers/base.py +340 -0
  110. camel/verifiers/models.py +82 -0
  111. camel/verifiers/python_verifier.py +202 -0
  112. camel_ai-0.2.23.dist-info/METADATA +671 -0
  113. {camel_ai-0.2.21.dist-info → camel_ai-0.2.23.dist-info}/RECORD +127 -99
  114. {camel_ai-0.2.21.dist-info → camel_ai-0.2.23.dist-info}/WHEEL +1 -1
  115. camel_ai-0.2.21.dist-info/METADATA +0 -528
  116. {camel_ai-0.2.21.dist-info → camel_ai-0.2.23.dist-info/licenses}/LICENSE +0 -0
@@ -30,6 +30,8 @@ class OpenAIEmbedding(BaseEmbedding[str]):
30
30
  model_type (EmbeddingModelType, optional): The model type to be
31
31
  used for text embeddings.
32
32
  (default: :obj:`TEXT_EMBEDDING_3_SMALL`)
33
+ url (Optional[str], optional): The url to the OpenAI service.
34
+ (default: :obj:`None`)
33
35
  api_key (str, optional): The API key for authenticating with the
34
36
  OpenAI service. (default: :obj:`None`)
35
37
  dimensions (int, optional): The text embedding output dimensions.
@@ -49,6 +51,7 @@ class OpenAIEmbedding(BaseEmbedding[str]):
49
51
  model_type: EmbeddingModelType = (
50
52
  EmbeddingModelType.TEXT_EMBEDDING_3_SMALL
51
53
  ),
54
+ url: str | None = None,
52
55
  api_key: str | None = None,
53
56
  dimensions: int | NotGiven = NOT_GIVEN,
54
57
  ) -> None:
@@ -61,7 +64,13 @@ class OpenAIEmbedding(BaseEmbedding[str]):
61
64
  assert isinstance(dimensions, int)
62
65
  self.output_dim = dimensions
63
66
  self._api_key = api_key or os.environ.get("OPENAI_API_KEY")
64
- self.client = OpenAI(timeout=180, max_retries=3, api_key=self._api_key)
67
+ self._url = url or os.environ.get("OPENAI_API_BASE_URL")
68
+ self.client = OpenAI(
69
+ timeout=180,
70
+ max_retries=3,
71
+ base_url=self._url,
72
+ api_key=self._api_key,
73
+ )
65
74
 
66
75
  def embed_list(
67
76
  self,
@@ -0,0 +1,16 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+ from .base import BaseEnvironment
15
+
16
+ __all__ = ["BaseEnvironment"]
@@ -0,0 +1,503 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+ import asyncio
16
+ from abc import ABC, abstractmethod
17
+ from datetime import datetime, timezone
18
+ from typing import Any, Dict, List, Optional, Tuple
19
+
20
+ from pydantic import BaseModel, Field
21
+
22
+ from camel.agents import ChatAgent
23
+ from camel.datasets.base import BaseDataset, GenerativeDataset
24
+ from camel.extractors.base import BaseExtractor
25
+ from camel.logger import get_logger
26
+ from camel.verifiers.base import (
27
+ BaseVerifier,
28
+ VerificationResult,
29
+ )
30
+ from camel.verifiers.models import (
31
+ VerificationOutcome,
32
+ VerifierInput,
33
+ )
34
+
35
+ logger = get_logger(__name__)
36
+
37
+ # TODO: Add MachineInfo into this file
38
+ # TODO: TeacherAgent should be renamed into neural_reward_model.
39
+ # This is where PRMs or such could be useful.
40
+ # Should probably be its own class and not just raw ChatAgent
41
+
42
+
43
+ class Action(BaseModel):
44
+ r"""Represents an action taken in an environment.
45
+
46
+ This class defines the input context, the LLM-generated output, and
47
+ metadata required for verification and tracking within an RL
48
+ framework.
49
+
50
+ Attributes:
51
+ problem_statement (str): The task or query given to the LLM as
52
+ input.
53
+ llm_response (str): The response generated by the LLM.
54
+ final_answer (Optional[str]): The reference solution, if
55
+ available, used for supervised learning or evaluation.
56
+ metadata (Dict[str, Any]): Additional metadata such as model
57
+ parameters, prompt details, or response confidence scores.
58
+ timestamp (datetime): The timestamp when the action was
59
+ generated (UTC).
60
+ """
61
+
62
+ problem_statement: str = Field(description="Problem statement for the LLM")
63
+ llm_response: str = Field(description="Generated response from the LLM")
64
+ final_answer: Optional[str] = Field(
65
+ None, description="Reference solution if available"
66
+ )
67
+ metadata: Dict[str, Any] = Field(
68
+ default_factory=dict,
69
+ description="Additional metadata about the generation",
70
+ )
71
+ timestamp: datetime = Field(
72
+ default_factory=lambda: datetime.now(timezone.utc),
73
+ description="When the response was generated (UTC)",
74
+ )
75
+
76
+
77
+ class Observation(BaseModel):
78
+ r"""Environment observation.
79
+
80
+ Attributes:
81
+ question: The question posed to the LLM.
82
+ context: Additional context for the question.
83
+ metadata: Optional metadata about the observation.
84
+ """
85
+
86
+ question: str = Field(..., description="The question posed to the LLM")
87
+ context: Dict[str, Any] = Field(
88
+ default_factory=dict, description="Additional context for the question"
89
+ )
90
+ metadata: Optional[Dict[str, Any]] = Field(
91
+ default=None, description="Optional metadata about the observation"
92
+ )
93
+
94
+
95
+ class StepResult(BaseModel):
96
+ r"""Result of an environment step.
97
+
98
+ Attributes:
99
+ observation: The next observation.
100
+ reward: Dictionary of reward scores for different aspects.
101
+ done: Whether the episode is complete.
102
+ info: Additional information about the step.
103
+ """
104
+
105
+ observation: Observation = Field(..., description="The next observation")
106
+ reward: float = Field(..., description="Total reward of the action")
107
+ rewards_dict: Dict[str, float] = Field(
108
+ default_factory=dict,
109
+ description="Dictionary of reward scores for different aspects",
110
+ )
111
+ done: bool = Field(..., description="Whether the episode is complete")
112
+ info: Dict[str, Any] = Field(
113
+ default_factory=dict,
114
+ description="Additional information about the step",
115
+ )
116
+
117
+
118
+ class BaseEnvironment(ABC):
119
+ r"""Base class for all RLVR training environments.
120
+
121
+ An environment ties everything together. It:
122
+ 1. Holds state and manages curriculum progression
123
+ 2. Defines reward functions and hint generation
124
+ 3. Manages dataset and task selection
125
+ 4. Provides reset and step functions
126
+ 5. Handles verifier setup and teardown
127
+ 6. Enables proactive agent behavior
128
+ 7. Supports practice environment creation
129
+ 8. Facilitates chain-of-thought verification
130
+
131
+ Key Features:
132
+ - Curriculum learning with adaptive difficulty
133
+ - Reward shaping based on solution quality
134
+ - Hint generation from verified solutions
135
+ - Task selection based on agent progress
136
+ - Practice environment generation
137
+ - Chain-of-thought validation
138
+ """
139
+
140
+ def __init__(
141
+ self,
142
+ dataset: BaseDataset,
143
+ verifier: BaseVerifier,
144
+ extractor: BaseExtractor,
145
+ max_steps: Optional[int] = None,
146
+ teacher_agent: Optional[ChatAgent] = None,
147
+ curriculum_config: Optional[Dict[str, Any]] = None,
148
+ practice_env_config: Optional[Dict[str, Any]] = None,
149
+ **kwargs,
150
+ ) -> None:
151
+ r"""Initialize the environment.
152
+
153
+ Args:
154
+ dataset: Dataset to sample questions from.
155
+ verifier: Verifier to check responses.
156
+ extractor: Extractor to process LLM responses.
157
+ max_steps: Maximum steps per episode.
158
+ teacher_agent: Optional agent for reward shaping and hints
159
+ curriculum_config: Configuration for curriculum learning including:
160
+ - difficulty_levels: List of available difficulty levels
161
+ - promotion_threshold: Score needed to advance
162
+ - demotion_threshold: Score triggering level decrease
163
+ - min_questions_per_level: Questions before promotion
164
+ practice_env_config: Configuration for practice environments:
165
+ - max_practice_envs: Maximum concurrent environments
166
+ - difficulty_range: Allowed difficulty variation
167
+ - focus_areas: Specific skills to practice
168
+ **kwargs: Additional environment parameters.
169
+ """
170
+ self.dataset = dataset
171
+ self.verifier = verifier
172
+ self.extractor = extractor
173
+ self.max_steps = max_steps
174
+ self.teacher_agent = teacher_agent
175
+ self._metadata = kwargs
176
+
177
+ # State tracking
178
+ self._is_setup: bool = False
179
+ self._current_step: int = 0
180
+ self._episode_ended: bool = False
181
+ self._state: Dict[str, Any] = self._get_initial_state()
182
+ self._last_observation: Optional[Observation] = None
183
+ self._episode_history: List[Tuple[Observation, Action]] = []
184
+
185
+ @abstractmethod
186
+ async def setup(self) -> None:
187
+ r"""Set up the environment, including verifier initialization."""
188
+ if self._is_setup:
189
+ return
190
+
191
+ try:
192
+ # Initialize core components
193
+ if hasattr(self.verifier, 'setup'):
194
+ await self.verifier.setup()
195
+ if hasattr(self.dataset, 'setup'):
196
+ await self.dataset.setup()
197
+ if hasattr(self.extractor, 'setup'):
198
+ await self.extractor.setup()
199
+
200
+ # initialize agents if present
201
+ if self.teacher_agent:
202
+ await self.teacher_agent.reset()
203
+
204
+ self._is_setup = True
205
+ logger.info('Environment setup completed successfully')
206
+ except Exception as e:
207
+ logger.error(f'Failed to setup environment: {e}')
208
+ raise
209
+
210
+ @abstractmethod
211
+ async def teardown(self) -> None:
212
+ r"""Clean up resources, including verifier teardown."""
213
+ if not self._is_setup:
214
+ return
215
+
216
+ try:
217
+ # Cleanup components
218
+ if hasattr(self.verifier, 'cleanup'):
219
+ await self.verifier.cleanup()
220
+ if hasattr(self.dataset, 'cleanup'):
221
+ await self.dataset.cleanup()
222
+ if hasattr(self.extractor, 'cleanup'):
223
+ await self.extractor.cleanup()
224
+
225
+ self._is_setup = False
226
+ logger.info('Environment teardown completed successfully')
227
+ except Exception as e:
228
+ logger.error(f'Failed to teardown environment: {e}')
229
+ raise
230
+
231
+ @abstractmethod
232
+ async def reset(self) -> Observation:
233
+ r"""Reset the environment to initial state.
234
+
235
+ Returns:
236
+ Initial observation for the episode
237
+ """
238
+
239
+ if not self._is_setup:
240
+ await self.setup()
241
+
242
+ # Reset state
243
+ self._current_step = 0
244
+ self._episode_ended = False
245
+ self._episode_history = []
246
+ self._state = self._get_initial_state()
247
+
248
+ # Get initial observation
249
+ observation = self._get_next_observation()
250
+ if observation is None:
251
+ raise RuntimeError("Failed to get initial observation")
252
+
253
+ self._last_observation = observation
254
+
255
+ return observation
256
+
257
+ @abstractmethod
258
+ async def step(self, action: Action) -> StepResult:
259
+ r"""Take a step in the environment.
260
+
261
+ Args:
262
+ action: Action containing everything that is needed
263
+ to progress in the environment
264
+
265
+ Returns:
266
+ StepResult containing next observation, reward, done flag, and info
267
+ """
268
+ if self.max_steps and self._current_step >= self.max_steps:
269
+ return StepResult(
270
+ observation=self._get_terminal_observation(),
271
+ reward=0,
272
+ rewards_dict={},
273
+ done=True,
274
+ info={"reason": "max_steps_reached"},
275
+ )
276
+
277
+ if not self._is_setup:
278
+ raise RuntimeError("Environment not set up. Call setup() first.")
279
+ if self._episode_ended:
280
+ raise RuntimeError("Episode has ended. Call reset() first.")
281
+ if self._last_observation is None:
282
+ raise RuntimeError("No current observation. Call reset() first.")
283
+
284
+ self._current_step += 1
285
+
286
+ current_obs: Observation = self._last_observation
287
+ self._episode_history.append((current_obs, action))
288
+
289
+ # extract verifiable part from llm response
290
+ extraction_result = await self.extractor.extract(action.llm_response)
291
+
292
+ # TODO: extract executable llm response specifically
293
+
294
+ # verify the extracted
295
+ verification_result = await self.verifier.verify(
296
+ VerifierInput(
297
+ llm_response=extraction_result,
298
+ ground_truth=action.final_answer,
299
+ )
300
+ )
301
+
302
+ # compute rewards
303
+ total_reward, rewards_dict = await self.compute_reward(
304
+ action, extraction_result, verification_result
305
+ )
306
+
307
+ # check termination
308
+ done = self._is_done()
309
+
310
+ next_obs = (
311
+ self._get_terminal_observation()
312
+ if done
313
+ else self._get_next_observation()
314
+ )
315
+
316
+ self._last_observation = next_obs
317
+ self._episode_ended = done
318
+
319
+ return StepResult(
320
+ observation=next_obs,
321
+ reward=total_reward,
322
+ rewards_dict=rewards_dict,
323
+ done=done,
324
+ info={
325
+ "extraction_result": extraction_result,
326
+ "verification_result": verification_result,
327
+ "step": self._current_step,
328
+ "state": self._state,
329
+ },
330
+ )
331
+
332
+ @abstractmethod
333
+ def _get_initial_state(self) -> Dict[str, Any]:
334
+ r"""Get initial environment state."""
335
+
336
+ return {
337
+ "current_datapoint": None,
338
+ "attempts": 0,
339
+ "success_rate": 0.0,
340
+ "rewards": [],
341
+ "termination_reason": None,
342
+ }
343
+
344
+ @abstractmethod
345
+ def _get_next_observation(self) -> Observation:
346
+ r"""Get the next observation for the environment.
347
+
348
+ Returns:
349
+ Observation for the next step
350
+ """
351
+ if not self.dataset or len(self.dataset) == 0:
352
+ logger.warning(
353
+ "Dataset is empty. Attempting to generate new data..."
354
+ )
355
+ if isinstance(self.dataset, GenerativeDataset):
356
+ try:
357
+ asyncio.run(
358
+ self.dataset.generate_new(1)
359
+ ) # Generate at least one datapoint
360
+ logger.info("Generated new datapoint successfully.")
361
+ except Exception as e:
362
+ logger.error(f"Failed to generate new data: {e}")
363
+ return self._get_terminal_observation()
364
+ else:
365
+ logger.error("Dataset is empty and not a GenerativeDataset.")
366
+ return self._get_terminal_observation()
367
+
368
+ try:
369
+ # Ensure dataset is not empty after generation attempt
370
+ if len(self.dataset) == 0:
371
+ logger.error("Dataset is still empty after generation.")
372
+ return self._get_terminal_observation()
373
+
374
+ # Sample the next datapoint
375
+ datapoint_idx = self._current_step % len(self.dataset)
376
+ datapoint = self.dataset[datapoint_idx]
377
+
378
+ if not datapoint:
379
+ logger.error(f"Invalid datapoint at index {datapoint_idx}")
380
+ return self._get_terminal_observation()
381
+
382
+ self._state["current_datapoint"] = datapoint
383
+
384
+ # Extract necessary attributes safely
385
+ question = getattr(datapoint, "question", None)
386
+ final_answer = getattr(datapoint, "final_answer", None)
387
+ rationale = getattr(datapoint, "rationale", None)
388
+ difficulty = getattr(datapoint, "difficulty", None)
389
+ metadata = getattr(datapoint, "metadata", {})
390
+
391
+ if not question or not final_answer:
392
+ logger.error(
393
+ f"Datapoint at index {datapoint_idx} "
394
+ "is missing required fields."
395
+ )
396
+ return self._get_terminal_observation()
397
+
398
+ observation = Observation(
399
+ question=question,
400
+ context={
401
+ "final_answer": final_answer,
402
+ "difficulty": difficulty,
403
+ "rationale": rationale,
404
+ },
405
+ metadata={
406
+ "step": self._current_step,
407
+ "datapoint_id": str(datapoint_idx),
408
+ "verified": metadata.get("verified", False),
409
+ **metadata,
410
+ },
411
+ )
412
+
413
+ logger.debug(
414
+ f"Generated observation for step {self._current_step}"
415
+ )
416
+ return observation
417
+
418
+ except (IndexError, AttributeError) as e:
419
+ logger.error(f"Error getting next observation: {e}")
420
+ return self._get_terminal_observation()
421
+ except Exception as e:
422
+ logger.error(f"Unexpected error getting next observation: {e}")
423
+ return self._get_terminal_observation()
424
+
425
+ @abstractmethod
426
+ def _get_terminal_observation(self) -> Observation:
427
+ r"""Get the terminal observation when episode ends.
428
+
429
+ Returns:
430
+ Terminal observation
431
+ """
432
+ return Observation(
433
+ question="Episode completed",
434
+ context={},
435
+ metadata={"terminal": True, "final_step": self._current_step},
436
+ )
437
+
438
+ @abstractmethod
439
+ async def compute_reward(
440
+ self,
441
+ action: Action,
442
+ extraction_result: str,
443
+ verification_result: VerificationResult,
444
+ ) -> Tuple[float, Dict[str, float]]:
445
+ r"""Compute reward scores for different aspects of the response.
446
+
447
+ Args:
448
+ response: The response.
449
+ extraction_result: Extracted information from response
450
+ verification_result: Result from the verifier.
451
+
452
+ Returns:
453
+ - Total reward
454
+ - Dictionary of reward scores for different aspects.
455
+ """
456
+ rewards: Dict[str, float] = {}
457
+
458
+ # Get success from verification result status
459
+ verification_success = float(
460
+ verification_result.status == VerificationOutcome.SUCCESS
461
+ )
462
+ rewards["correctness"] = 1.0 if verification_success > 0.5 else 0.0
463
+
464
+ # Update state
465
+ self._state["rewards"].append(rewards)
466
+ total_attempts = self._state["attempts"] + 1
467
+ self._state["success_rate"] = (
468
+ self._state["success_rate"] * (total_attempts - 1)
469
+ + verification_success
470
+ ) / total_attempts
471
+
472
+ further_rewards = await self._compute_reward(
473
+ action, extraction_result, verification_result
474
+ )
475
+
476
+ rewards = rewards | further_rewards
477
+
478
+ return sum(rewards.values()), rewards
479
+
480
+ @abstractmethod
481
+ async def _compute_reward(
482
+ self,
483
+ action: Action,
484
+ extraction_result: str,
485
+ verification_result: VerificationResult,
486
+ ) -> Dict[str, float]:
487
+ pass
488
+
489
+ def _is_done(self) -> bool:
490
+ r"""Check if episode should terminate."""
491
+ if self.max_steps and self._current_step >= self.max_steps:
492
+ return True
493
+ return False
494
+
495
+ @property
496
+ def metadata(self) -> Dict[str, Any]:
497
+ r"""Get environment metadata."""
498
+ return self._metadata.copy()
499
+
500
+ @property
501
+ def current_step(self) -> int:
502
+ r"""Get current step number."""
503
+ return self._current_step
@@ -0,0 +1,16 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+ from .base import BaseExtractor
15
+
16
+ __all__ = ["BaseExtractor"]