camel-ai 0.2.36__py3-none-any.whl → 0.2.38__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/__init__.py +2 -0
- camel/agents/repo_agent.py +579 -0
- camel/configs/aiml_config.py +20 -19
- camel/configs/anthropic_config.py +25 -27
- camel/configs/cohere_config.py +11 -10
- camel/configs/deepseek_config.py +16 -16
- camel/configs/gemini_config.py +8 -8
- camel/configs/groq_config.py +18 -19
- camel/configs/internlm_config.py +8 -8
- camel/configs/litellm_config.py +26 -24
- camel/configs/mistral_config.py +8 -8
- camel/configs/moonshot_config.py +11 -11
- camel/configs/nvidia_config.py +13 -13
- camel/configs/ollama_config.py +14 -15
- camel/configs/openai_config.py +3 -3
- camel/configs/openrouter_config.py +9 -9
- camel/configs/qwen_config.py +8 -8
- camel/configs/reka_config.py +12 -11
- camel/configs/samba_config.py +14 -14
- camel/configs/sglang_config.py +15 -16
- camel/configs/siliconflow_config.py +18 -17
- camel/configs/togetherai_config.py +18 -19
- camel/configs/vllm_config.py +18 -19
- camel/configs/yi_config.py +7 -8
- camel/configs/zhipuai_config.py +8 -9
- camel/datagen/evol_instruct/__init__.py +20 -0
- camel/datagen/evol_instruct/evol_instruct.py +424 -0
- camel/datagen/evol_instruct/scorer.py +166 -0
- camel/datagen/evol_instruct/templates.py +268 -0
- camel/datasets/static_dataset.py +25 -23
- camel/environments/models.py +10 -1
- camel/environments/single_step.py +296 -136
- camel/extractors/__init__.py +16 -1
- camel/interpreters/docker_interpreter.py +1 -1
- camel/interpreters/e2b_interpreter.py +1 -1
- camel/interpreters/subprocess_interpreter.py +1 -1
- camel/loaders/__init__.py +2 -2
- camel/loaders/{panda_reader.py → pandas_reader.py} +61 -30
- camel/memories/context_creators/score_based.py +198 -67
- camel/models/aiml_model.py +9 -3
- camel/models/anthropic_model.py +11 -3
- camel/models/azure_openai_model.py +9 -3
- camel/models/base_audio_model.py +6 -0
- camel/models/base_model.py +4 -0
- camel/models/deepseek_model.py +9 -3
- camel/models/gemini_model.py +9 -3
- camel/models/groq_model.py +9 -3
- camel/models/internlm_model.py +8 -2
- camel/models/model_factory.py +4 -0
- camel/models/moonshot_model.py +8 -2
- camel/models/nemotron_model.py +9 -3
- camel/models/nvidia_model.py +9 -3
- camel/models/ollama_model.py +9 -3
- camel/models/openai_audio_models.py +5 -3
- camel/models/openai_compatible_model.py +9 -3
- camel/models/openai_model.py +9 -3
- camel/models/openrouter_model.py +9 -3
- camel/models/qwen_model.py +9 -3
- camel/models/samba_model.py +9 -3
- camel/models/sglang_model.py +11 -4
- camel/models/siliconflow_model.py +8 -2
- camel/models/stub_model.py +2 -1
- camel/models/togetherai_model.py +9 -3
- camel/models/vllm_model.py +9 -3
- camel/models/yi_model.py +9 -3
- camel/models/zhipuai_model.py +9 -3
- camel/retrievers/auto_retriever.py +14 -0
- camel/storages/__init__.py +2 -0
- camel/storages/vectordb_storages/__init__.py +2 -0
- camel/storages/vectordb_storages/tidb.py +332 -0
- camel/toolkits/__init__.py +7 -0
- camel/toolkits/browser_toolkit.py +84 -61
- camel/toolkits/openai_agent_toolkit.py +131 -0
- camel/toolkits/searxng_toolkit.py +207 -0
- camel/toolkits/thinking_toolkit.py +230 -0
- camel/types/enums.py +4 -0
- camel/utils/chunker/code_chunker.py +9 -15
- camel/verifiers/base.py +28 -5
- camel/verifiers/python_verifier.py +321 -68
- {camel_ai-0.2.36.dist-info → camel_ai-0.2.38.dist-info}/METADATA +103 -8
- {camel_ai-0.2.36.dist-info → camel_ai-0.2.38.dist-info}/RECORD +84 -75
- {camel_ai-0.2.36.dist-info → camel_ai-0.2.38.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.36.dist-info → camel_ai-0.2.38.dist-info}/licenses/LICENSE +0 -0
|
@@ -12,12 +12,10 @@
|
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
from
|
|
17
|
-
from typing import Any, Dict, Optional, Tuple, Union
|
|
15
|
+
import random
|
|
16
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
18
17
|
|
|
19
18
|
from camel.datasets import BaseGenerator, DataPoint, StaticDataset
|
|
20
|
-
from camel.extractors.base import BaseExtractor
|
|
21
19
|
from camel.logger import get_logger
|
|
22
20
|
from camel.verifiers.base import (
|
|
23
21
|
BaseVerifier,
|
|
@@ -30,18 +28,23 @@ logger = get_logger(__name__)
|
|
|
30
28
|
|
|
31
29
|
|
|
32
30
|
class SingleStepEnv:
|
|
33
|
-
r"""A
|
|
31
|
+
r"""A lightweight environment for single-step RL with LLMs as policy.
|
|
32
|
+
|
|
33
|
+
This environment models a single interaction between an LLM-based agent
|
|
34
|
+
and a problem drawn from a dataset—such as a question-answering or
|
|
35
|
+
math problem—where the agent produces one response and receives feedback.
|
|
36
|
+
|
|
37
|
+
Core Flow:
|
|
38
|
+
- A question is sampled from a (possibly infinitely long) dataset.
|
|
39
|
+
- The LLM generates a single-step response (the action).
|
|
40
|
+
- The response is verified against the ground truth.
|
|
41
|
+
- A reward is computed based on correctness and optional custom logic.
|
|
34
42
|
|
|
35
43
|
Key Features:
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
- Supports async setup, teardown, and cleanup of resources.
|
|
41
|
-
|
|
42
|
-
This class is intended as a foundation for RL experiments involving
|
|
43
|
-
LLM-based policies, ensuring structured interactions between model
|
|
44
|
-
actions and verification mechanisms.
|
|
44
|
+
- Batched evaluation with per-sample state tracking.
|
|
45
|
+
- Async setup and teardown for verifiers and related resources.
|
|
46
|
+
- Supports deterministic sampling via local RNG (optional seed).
|
|
47
|
+
- Extensible reward computation via subclassing.
|
|
45
48
|
"""
|
|
46
49
|
|
|
47
50
|
PLACEHOLDER_OBS = Observation(
|
|
@@ -54,43 +57,47 @@ class SingleStepEnv:
|
|
|
54
57
|
self,
|
|
55
58
|
dataset: Union[StaticDataset, BaseGenerator],
|
|
56
59
|
verifier: BaseVerifier,
|
|
57
|
-
extractor: BaseExtractor,
|
|
58
60
|
**kwargs,
|
|
59
61
|
) -> None:
|
|
60
|
-
r"""Initialize the
|
|
62
|
+
r"""Initialize the SingleStepEnv.
|
|
61
63
|
|
|
62
64
|
Args:
|
|
63
|
-
dataset: Dataset to sample
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
65
|
+
dataset (Union[StaticDataset, BaseGenerator]): Dataset to sample
|
|
66
|
+
problems from.
|
|
67
|
+
verifier (BaseVerifier): Verifier used to evaluate LLM responses
|
|
68
|
+
against ground-truth answers.
|
|
69
|
+
**kwargs: Optional metadata or configuration values.
|
|
70
|
+
|
|
71
|
+
Notes:
|
|
72
|
+
This class assumes all interactions are single-step: one question,
|
|
73
|
+
one LLM response, one reward.
|
|
67
74
|
"""
|
|
68
75
|
self.dataset = dataset
|
|
69
76
|
self.verifier = verifier
|
|
70
|
-
self.extractor = extractor
|
|
71
77
|
self._metadata = kwargs
|
|
72
78
|
|
|
73
79
|
# State tracking
|
|
74
80
|
self._is_setup: bool = False
|
|
75
|
-
self.
|
|
76
|
-
self.
|
|
81
|
+
self._states: List[DataPoint] = []
|
|
82
|
+
self._states_done: List[bool] = []
|
|
83
|
+
self.current_batch_size: int = 0
|
|
77
84
|
|
|
78
85
|
async def setup(self) -> None:
|
|
79
|
-
r"""Set up the environment by initializing the verifier
|
|
86
|
+
r"""Set up the environment by initializing the verifier.
|
|
80
87
|
|
|
81
88
|
This method ensures that the environment is ready for interaction.
|
|
82
|
-
It sets up necessary components, including the verifier
|
|
89
|
+
It sets up necessary components, including the verifier.
|
|
83
90
|
|
|
84
91
|
Raises:
|
|
85
92
|
Exception: If setup fails due to an internal error.
|
|
86
93
|
"""
|
|
87
94
|
|
|
88
95
|
if self._is_setup:
|
|
96
|
+
logger.warning("Environment has already been set up")
|
|
89
97
|
return
|
|
90
98
|
|
|
91
99
|
try:
|
|
92
100
|
await self.verifier.setup()
|
|
93
|
-
await self.extractor.setup()
|
|
94
101
|
|
|
95
102
|
self._is_setup = True
|
|
96
103
|
logger.info('Environment setup completed successfully')
|
|
@@ -101,7 +108,7 @@ class SingleStepEnv:
|
|
|
101
108
|
async def close(self) -> None:
|
|
102
109
|
r"""Clean up and close all resources used by the environment.
|
|
103
110
|
|
|
104
|
-
This method shuts down the verifier
|
|
111
|
+
This method shuts down the verifier, resets the internal
|
|
105
112
|
state, and ensures that the environment is properly closed.
|
|
106
113
|
|
|
107
114
|
Raises:
|
|
@@ -109,170 +116,323 @@ class SingleStepEnv:
|
|
|
109
116
|
"""
|
|
110
117
|
|
|
111
118
|
if not self._is_setup:
|
|
119
|
+
logger.warning(
|
|
120
|
+
"Not closing environment - has not been set up yet."
|
|
121
|
+
)
|
|
112
122
|
return
|
|
113
123
|
|
|
114
124
|
try:
|
|
115
125
|
self._is_setup = False
|
|
116
126
|
await self.verifier.cleanup()
|
|
117
|
-
|
|
118
|
-
self.
|
|
119
|
-
self.
|
|
127
|
+
self._states = []
|
|
128
|
+
self._states_done = []
|
|
129
|
+
self.current_batch_size = 0
|
|
120
130
|
logger.info('Environment closed successfully')
|
|
121
131
|
except Exception as e:
|
|
122
132
|
logger.error(f'Failed to close environment: {e}')
|
|
123
133
|
raise
|
|
124
134
|
|
|
125
|
-
async def reset(
|
|
126
|
-
|
|
135
|
+
async def reset(
|
|
136
|
+
self, batch_size: int = 1, seed: Optional[int] = None
|
|
137
|
+
) -> Union[Observation, List[Observation]]:
|
|
138
|
+
r"""Resets the environment and starts a new episode.
|
|
127
139
|
|
|
128
|
-
This method samples a new data
|
|
129
|
-
initial
|
|
140
|
+
This method samples a new batch of data points from the dataset and
|
|
141
|
+
returns the corresponding initial observations.
|
|
142
|
+
|
|
143
|
+
If a seed is provided, a local random number generator is initialized
|
|
144
|
+
for deterministic sampling. The global random state is not affected.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
batch_size (int): Number of data points to sample.
|
|
148
|
+
(default: :obj:`1`)
|
|
149
|
+
seed (Optional[int]): Seed for deterministic sampling. If None,
|
|
150
|
+
sampling is non-deterministic. (default: :obj:`None`)
|
|
130
151
|
|
|
131
152
|
Returns:
|
|
132
|
-
Observation:
|
|
133
|
-
|
|
153
|
+
Observation or List[Observation]: Initial observation(s) for the
|
|
154
|
+
episode.
|
|
134
155
|
|
|
135
156
|
Raises:
|
|
136
|
-
|
|
157
|
+
RuntimeError: If called before all previous states are processed.
|
|
158
|
+
ValueError: If batch size exceeds dataset size.
|
|
159
|
+
TypeError: If the dataset is of an unsupported type.
|
|
137
160
|
"""
|
|
161
|
+
if batch_size <= 0:
|
|
162
|
+
raise ValueError("Batch size must be positive")
|
|
138
163
|
|
|
139
164
|
if not self._is_setup:
|
|
165
|
+
logger.warning(
|
|
166
|
+
"reset() called on un-setup environment. Setting up..."
|
|
167
|
+
)
|
|
140
168
|
await self.setup()
|
|
141
169
|
|
|
142
|
-
self.
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
170
|
+
if self._batch_started() and not self._batch_done():
|
|
171
|
+
logger.error(
|
|
172
|
+
"Reset called before all states were processed. "
|
|
173
|
+
"Call step on remaining states first."
|
|
174
|
+
)
|
|
175
|
+
raise RuntimeError(
|
|
176
|
+
"reset() called before all states in batch were processed."
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
if seed is not None:
|
|
180
|
+
rng = random.Random(seed)
|
|
181
|
+
else:
|
|
182
|
+
rng = random.Random()
|
|
183
|
+
|
|
184
|
+
if isinstance(self.dataset, StaticDataset):
|
|
185
|
+
dataset_len = len(self.dataset)
|
|
186
|
+
|
|
187
|
+
if batch_size > dataset_len:
|
|
188
|
+
raise ValueError(
|
|
189
|
+
f"Batch size {batch_size} is too large for dataset "
|
|
190
|
+
f"of size {dataset_len}"
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
start_idx = rng.randint(0, dataset_len - batch_size)
|
|
194
|
+
idx_slice = slice(start_idx, start_idx + batch_size)
|
|
195
|
+
val = self.dataset[idx_slice]
|
|
196
|
+
self._states = [val] if isinstance(val, DataPoint) else val
|
|
197
|
+
|
|
198
|
+
self.current_batch_size = len(self._states)
|
|
199
|
+
self._states_done = [False] * self.current_batch_size
|
|
200
|
+
|
|
201
|
+
observations = [
|
|
202
|
+
Observation(question=sample.question, context={}, metadata={})
|
|
203
|
+
for sample in self._states
|
|
204
|
+
]
|
|
205
|
+
|
|
206
|
+
return observations[0] if batch_size == 1 else observations
|
|
207
|
+
|
|
208
|
+
elif isinstance(self.dataset, BaseGenerator):
|
|
209
|
+
raise NotImplementedError(
|
|
210
|
+
"Reset not yet implemented for BaseGenerator datasets."
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
else:
|
|
214
|
+
raise TypeError(f"Unsupported dataset type: {type(self.dataset)}")
|
|
215
|
+
|
|
216
|
+
async def step(
|
|
217
|
+
self, action: Union[Action, List[Action]]
|
|
218
|
+
) -> Union[
|
|
219
|
+
Tuple[Observation, float, bool, Dict[str, Any]],
|
|
220
|
+
List[Tuple[Observation, float, bool, Dict[str, Any]]],
|
|
221
|
+
]:
|
|
222
|
+
r"""Process actions for a subset of states and update their finished
|
|
223
|
+
status.
|
|
159
224
|
|
|
160
225
|
Args:
|
|
161
|
-
action
|
|
162
|
-
|
|
226
|
+
action: Single action (for batch_size=1 or micro-batch of size 1)
|
|
227
|
+
or list of actions (for batch_size>=2 with multiple actions).
|
|
228
|
+
Each action must have an index for batch_size>=2, indicating
|
|
229
|
+
which state it corresponds to.
|
|
163
230
|
|
|
164
231
|
Returns:
|
|
165
|
-
StepResult:
|
|
166
|
-
|
|
167
|
-
information.
|
|
232
|
+
Union[StepResult, List[StepResult]]: StepResult or list of
|
|
233
|
+
StepResults for the processed states.
|
|
168
234
|
|
|
169
235
|
Raises:
|
|
170
|
-
RuntimeError: If
|
|
171
|
-
|
|
236
|
+
RuntimeError: If environment isn't set up or episode has ended.
|
|
237
|
+
ValueError: If indices are invalid, duplicate, or correspond to
|
|
238
|
+
finished states.
|
|
172
239
|
"""
|
|
173
|
-
|
|
174
240
|
if not self._is_setup:
|
|
175
241
|
raise RuntimeError("Environment not set up. Call setup() first.")
|
|
176
|
-
if self.
|
|
177
|
-
raise RuntimeError(
|
|
178
|
-
|
|
242
|
+
if self._batch_done():
|
|
243
|
+
raise RuntimeError(
|
|
244
|
+
"Episodes have ended for batch. Call reset() first."
|
|
245
|
+
)
|
|
246
|
+
if not self._states:
|
|
179
247
|
raise RuntimeError("No current observation. Call reset() first.")
|
|
180
248
|
|
|
181
|
-
#
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
249
|
+
# Normalize actions into a list for uniform processing
|
|
250
|
+
if self.current_batch_size == 1:
|
|
251
|
+
if isinstance(action, list):
|
|
252
|
+
if len(action) != 1 or not isinstance(action[0], Action):
|
|
253
|
+
raise ValueError(
|
|
254
|
+
"For batch_size=1, expect a single Action or a "
|
|
255
|
+
"list containing exactly one Action"
|
|
256
|
+
)
|
|
257
|
+
elif not isinstance(action, Action):
|
|
258
|
+
raise ValueError(
|
|
259
|
+
"For batch_size=1, expect a single Action or a "
|
|
260
|
+
"list containing exactly one Action"
|
|
261
|
+
)
|
|
262
|
+
if isinstance(action, Action):
|
|
263
|
+
actions = [action]
|
|
264
|
+
else:
|
|
265
|
+
actions = action
|
|
266
|
+
if actions[0].index is None:
|
|
267
|
+
actions[0].index = 0
|
|
268
|
+
if actions[0].index != 0:
|
|
269
|
+
raise ValueError("For batch_size=1, index must be None or 0")
|
|
270
|
+
|
|
271
|
+
else: # batch_size >= 2
|
|
272
|
+
if isinstance(action, Action):
|
|
273
|
+
if action.index is None:
|
|
274
|
+
raise ValueError(
|
|
275
|
+
"For batch_size>=2, each Action must have an index"
|
|
276
|
+
)
|
|
277
|
+
if not isinstance(action.index, int):
|
|
278
|
+
raise ValueError("Index must be an integer")
|
|
279
|
+
actions = [action]
|
|
280
|
+
elif isinstance(action, list):
|
|
281
|
+
if not action: # Empty list
|
|
282
|
+
raise ValueError("Action list cannot be empty")
|
|
283
|
+
actions = action
|
|
284
|
+
for act in actions:
|
|
285
|
+
if not isinstance(act, Action):
|
|
286
|
+
raise ValueError(
|
|
287
|
+
"All elements in list must be Action objects"
|
|
288
|
+
)
|
|
289
|
+
if act.index is None:
|
|
290
|
+
raise ValueError(
|
|
291
|
+
"For batch_size>=2, each Action must have an index"
|
|
292
|
+
)
|
|
293
|
+
if not isinstance(act.index, int):
|
|
294
|
+
raise ValueError("Index must be an integer")
|
|
295
|
+
else:
|
|
296
|
+
raise ValueError(
|
|
297
|
+
"For batch_size>=2, expect an Action or list of Actions"
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
# Validate indices
|
|
301
|
+
indices: List[int] = []
|
|
302
|
+
for act in actions:
|
|
303
|
+
assert act.index is not None
|
|
304
|
+
indices.append(act.index)
|
|
305
|
+
if len(set(indices)) != len(indices):
|
|
306
|
+
raise ValueError("Duplicate state indices in actions.")
|
|
307
|
+
for idx in indices:
|
|
308
|
+
if idx < 0 or idx >= len(self._states):
|
|
309
|
+
raise ValueError(f"Invalid state index {idx}.")
|
|
310
|
+
if self._states_done[idx]:
|
|
311
|
+
raise ValueError(f"State at index {idx} is already finished.")
|
|
312
|
+
|
|
313
|
+
num_actions = len(actions)
|
|
314
|
+
if self.current_batch_size % num_actions != 0:
|
|
315
|
+
logger.warning(
|
|
316
|
+
f"Number of actions ({num_actions}) is not a divisor of "
|
|
317
|
+
f"total batch size ({self.current_batch_size})"
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
proposed_solutions = [act.llm_response for act in actions]
|
|
321
|
+
ground_truths: List[str] = []
|
|
322
|
+
for idx in indices:
|
|
323
|
+
ground_truths.append(self._states[idx].final_answer)
|
|
324
|
+
|
|
325
|
+
verification_results = await self.verifier.verify_batch(
|
|
326
|
+
solutions=proposed_solutions,
|
|
327
|
+
ground_truths=ground_truths, # type: ignore [arg-type]
|
|
328
|
+
raise_on_error=True,
|
|
190
329
|
)
|
|
191
330
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
action, extraction_result, verification_result
|
|
331
|
+
total_rewards, rewards_dicts = await self._compute_reward_batch(
|
|
332
|
+
proposed_solutions, verification_results
|
|
195
333
|
)
|
|
196
334
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
335
|
+
# TODO Batch this
|
|
336
|
+
step_results = []
|
|
337
|
+
for i, action in enumerate(actions):
|
|
338
|
+
assert action.index is not None
|
|
339
|
+
idx = action.index
|
|
340
|
+
step_result = StepResult(
|
|
341
|
+
observation=self.PLACEHOLDER_OBS,
|
|
342
|
+
reward=total_rewards[i],
|
|
343
|
+
rewards_dict=rewards_dicts[i],
|
|
344
|
+
done=True,
|
|
345
|
+
info={
|
|
346
|
+
"proposed_solution": proposed_solutions[i],
|
|
347
|
+
"verification_result": verification_results[i],
|
|
348
|
+
"state": self._states[idx],
|
|
349
|
+
},
|
|
350
|
+
)
|
|
351
|
+
step_results.append(step_result.as_tuple())
|
|
352
|
+
self._states_done[idx] = True
|
|
353
|
+
|
|
354
|
+
return step_results[0] if len(step_results) == 1 else step_results
|
|
355
|
+
|
|
356
|
+
async def _compute_reward_batch(
|
|
212
357
|
self,
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
This method calculates the reward based on correctness and any
|
|
220
|
-
additional custom reward components.
|
|
358
|
+
proposed_solutions: List[str],
|
|
359
|
+
verification_results: List[VerificationResult],
|
|
360
|
+
) -> Tuple[List[float], List[Dict[str, float]]]:
|
|
361
|
+
r"""Compute rewards for a batch of proposed solutions based on
|
|
362
|
+
verification results.
|
|
221
363
|
|
|
222
364
|
Args:
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
the extracted response.
|
|
365
|
+
proposed_solutions (List[str]): List of LLM-generated responses to
|
|
366
|
+
evaluate.
|
|
367
|
+
verification_results (List[VerificationResult]): List of
|
|
368
|
+
verification outcomes for each solution.
|
|
228
369
|
|
|
229
370
|
Returns:
|
|
230
|
-
Tuple
|
|
231
|
-
-
|
|
232
|
-
-
|
|
233
|
-
|
|
234
|
-
Raises:
|
|
235
|
-
Exception: If an error occurs while computing rewards.
|
|
371
|
+
Tuple containing:
|
|
372
|
+
- List of total rewards for each solution.
|
|
373
|
+
- List of reward component dictionaries for each solution.
|
|
236
374
|
"""
|
|
375
|
+
if len(proposed_solutions) != len(verification_results):
|
|
376
|
+
raise ValueError(
|
|
377
|
+
f"Length mismatch: {len(proposed_solutions)} solutions vs "
|
|
378
|
+
f"{len(verification_results)} verification results"
|
|
379
|
+
)
|
|
237
380
|
|
|
238
|
-
|
|
381
|
+
total_rewards = []
|
|
382
|
+
rewards_dicts = []
|
|
239
383
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
)
|
|
384
|
+
for solution, verification_result in zip(
|
|
385
|
+
proposed_solutions, verification_results
|
|
386
|
+
):
|
|
387
|
+
rewards: Dict[str, float] = {}
|
|
243
388
|
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
389
|
+
rewards["correctness"] = (
|
|
390
|
+
self.ACCURACY_REWARD if verification_result.status else 0.0
|
|
391
|
+
)
|
|
247
392
|
|
|
248
|
-
|
|
393
|
+
further_rewards = await self._compute_custom_reward(
|
|
394
|
+
solution, verification_result
|
|
395
|
+
)
|
|
396
|
+
rewards = {**rewards, **further_rewards}
|
|
249
397
|
|
|
250
|
-
|
|
398
|
+
total_reward = sum(rewards.values())
|
|
399
|
+
total_rewards.append(total_reward)
|
|
400
|
+
rewards_dicts.append(rewards)
|
|
401
|
+
|
|
402
|
+
return total_rewards, rewards_dicts
|
|
251
403
|
|
|
252
|
-
@abstractmethod
|
|
253
404
|
async def _compute_custom_reward(
|
|
254
|
-
self,
|
|
255
|
-
action: Action,
|
|
256
|
-
extraction_result: str,
|
|
257
|
-
verification_result: VerificationResult,
|
|
405
|
+
self, proposed_solution: str, verification_result: VerificationResult
|
|
258
406
|
) -> Dict[str, float]:
|
|
259
|
-
r"""Compute additional custom reward components.
|
|
407
|
+
r"""Compute additional custom reward components for a single solution.
|
|
260
408
|
|
|
261
|
-
|
|
262
|
-
domain-specific reward calculations.
|
|
409
|
+
To be overridden by subclasses for domain-specific rewards.
|
|
263
410
|
|
|
264
411
|
Args:
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
412
|
+
proposed_solution (str): The LLM-generated response.
|
|
413
|
+
verification_result (VerificationResult): The verification outcome.
|
|
414
|
+
|
|
415
|
+
Returns:
|
|
416
|
+
Dict[str, float]: Dictionary of custom reward components.
|
|
417
|
+
"""
|
|
418
|
+
return {}
|
|
419
|
+
|
|
420
|
+
def _batch_done(self) -> bool:
|
|
421
|
+
r"""Check if all states in the current batch are done.
|
|
422
|
+
|
|
423
|
+
Returns:
|
|
424
|
+
bool: True if all states are marked as done, False otherwise.
|
|
425
|
+
"""
|
|
426
|
+
return all(self._states_done)
|
|
427
|
+
|
|
428
|
+
def _batch_started(self) -> bool:
|
|
429
|
+
r"""Check if any state in the current batch is done.
|
|
270
430
|
|
|
271
431
|
Returns:
|
|
272
|
-
|
|
273
|
-
|
|
432
|
+
bool: True if at least one state is marked as done, False
|
|
433
|
+
otherwise.
|
|
274
434
|
"""
|
|
275
|
-
|
|
435
|
+
return any(self._states_done)
|
|
276
436
|
|
|
277
437
|
@property
|
|
278
438
|
def metadata(self) -> Dict[str, Any]:
|
camel/extractors/__init__.py
CHANGED
|
@@ -12,5 +12,20 @@
|
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
from .base import BaseExtractor, BaseExtractorStrategy
|
|
15
|
+
from .python_strategies import (
|
|
16
|
+
BoxedStrategy,
|
|
17
|
+
PythonDictStrategy,
|
|
18
|
+
PythonListStrategy,
|
|
19
|
+
PythonSetStrategy,
|
|
20
|
+
PythonTupleStrategy,
|
|
21
|
+
)
|
|
15
22
|
|
|
16
|
-
__all__ = [
|
|
23
|
+
__all__ = [
|
|
24
|
+
"BaseExtractor",
|
|
25
|
+
"BaseExtractorStrategy",
|
|
26
|
+
"BoxedStrategy",
|
|
27
|
+
"PythonListStrategy",
|
|
28
|
+
"PythonDictStrategy",
|
|
29
|
+
"PythonSetStrategy",
|
|
30
|
+
"PythonTupleStrategy",
|
|
31
|
+
]
|
|
@@ -210,7 +210,7 @@ class DockerInterpreter(BaseInterpreter):
|
|
|
210
210
|
if self.require_confirm:
|
|
211
211
|
logger.info(
|
|
212
212
|
f"The following {code_type} code will run on your "
|
|
213
|
-
"computer: {code}"
|
|
213
|
+
f"computer: {code}"
|
|
214
214
|
)
|
|
215
215
|
while True:
|
|
216
216
|
choice = input("Running code? [Y/n]:").lower()
|
|
@@ -99,7 +99,7 @@ class E2BInterpreter(BaseInterpreter):
|
|
|
99
99
|
if self.require_confirm:
|
|
100
100
|
logger.info(
|
|
101
101
|
f"The following {code_type} code will run on your "
|
|
102
|
-
"e2b sandbox: {code}"
|
|
102
|
+
f"e2b sandbox: {code}"
|
|
103
103
|
)
|
|
104
104
|
while True:
|
|
105
105
|
choice = input("Running code? [Y/n]:").lower()
|
|
@@ -292,7 +292,7 @@ class SubprocessInterpreter(BaseInterpreter):
|
|
|
292
292
|
if self.require_confirm:
|
|
293
293
|
logger.info(
|
|
294
294
|
f"The following {code_type} code will run on your "
|
|
295
|
-
"computer: {code}"
|
|
295
|
+
f"computer: {code}"
|
|
296
296
|
)
|
|
297
297
|
while True:
|
|
298
298
|
choice = input("Running code? [Y/n]:").lower().strip()
|
camel/loaders/__init__.py
CHANGED
|
@@ -18,7 +18,7 @@ from .chunkr_reader import ChunkrReader
|
|
|
18
18
|
from .firecrawl_reader import Firecrawl
|
|
19
19
|
from .jina_url_reader import JinaURLReader
|
|
20
20
|
from .mineru_extractor import MinerU
|
|
21
|
-
from .
|
|
21
|
+
from .pandas_reader import PandasReader
|
|
22
22
|
from .unstructured_io import UnstructuredIO
|
|
23
23
|
|
|
24
24
|
__all__ = [
|
|
@@ -30,6 +30,6 @@ __all__ = [
|
|
|
30
30
|
'Firecrawl',
|
|
31
31
|
'Apify',
|
|
32
32
|
'ChunkrReader',
|
|
33
|
-
'
|
|
33
|
+
'PandasReader',
|
|
34
34
|
'MinerU',
|
|
35
35
|
]
|