camel-ai 0.2.37__py3-none-any.whl → 0.2.38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (55) hide show
  1. camel/__init__.py +1 -1
  2. camel/datagen/evol_instruct/__init__.py +20 -0
  3. camel/datagen/evol_instruct/evol_instruct.py +424 -0
  4. camel/datagen/evol_instruct/scorer.py +166 -0
  5. camel/datagen/evol_instruct/templates.py +268 -0
  6. camel/environments/models.py +10 -4
  7. camel/environments/single_step.py +91 -17
  8. camel/interpreters/docker_interpreter.py +1 -1
  9. camel/interpreters/e2b_interpreter.py +1 -1
  10. camel/interpreters/subprocess_interpreter.py +1 -1
  11. camel/loaders/__init__.py +2 -2
  12. camel/loaders/{panda_reader.py → pandas_reader.py} +61 -30
  13. camel/memories/context_creators/score_based.py +198 -67
  14. camel/models/aiml_model.py +9 -3
  15. camel/models/anthropic_model.py +11 -3
  16. camel/models/azure_openai_model.py +9 -3
  17. camel/models/base_audio_model.py +6 -0
  18. camel/models/base_model.py +4 -0
  19. camel/models/deepseek_model.py +9 -3
  20. camel/models/gemini_model.py +9 -3
  21. camel/models/groq_model.py +9 -3
  22. camel/models/internlm_model.py +8 -2
  23. camel/models/model_factory.py +4 -0
  24. camel/models/moonshot_model.py +8 -2
  25. camel/models/nemotron_model.py +9 -3
  26. camel/models/nvidia_model.py +9 -3
  27. camel/models/ollama_model.py +9 -3
  28. camel/models/openai_audio_models.py +5 -3
  29. camel/models/openai_compatible_model.py +9 -3
  30. camel/models/openai_model.py +9 -3
  31. camel/models/openrouter_model.py +9 -3
  32. camel/models/qwen_model.py +9 -3
  33. camel/models/samba_model.py +9 -3
  34. camel/models/sglang_model.py +11 -4
  35. camel/models/siliconflow_model.py +8 -2
  36. camel/models/stub_model.py +2 -1
  37. camel/models/togetherai_model.py +9 -3
  38. camel/models/vllm_model.py +9 -3
  39. camel/models/yi_model.py +9 -3
  40. camel/models/zhipuai_model.py +9 -3
  41. camel/retrievers/auto_retriever.py +14 -0
  42. camel/storages/__init__.py +2 -0
  43. camel/storages/vectordb_storages/__init__.py +2 -0
  44. camel/storages/vectordb_storages/tidb.py +332 -0
  45. camel/toolkits/__init__.py +5 -0
  46. camel/toolkits/browser_toolkit.py +84 -61
  47. camel/toolkits/openai_agent_toolkit.py +131 -0
  48. camel/toolkits/searxng_toolkit.py +207 -0
  49. camel/toolkits/thinking_toolkit.py +168 -12
  50. camel/types/enums.py +1 -0
  51. camel/verifiers/python_verifier.py +12 -4
  52. {camel_ai-0.2.37.dist-info → camel_ai-0.2.38.dist-info}/METADATA +52 -4
  53. {camel_ai-0.2.37.dist-info → camel_ai-0.2.38.dist-info}/RECORD +55 -48
  54. {camel_ai-0.2.37.dist-info → camel_ai-0.2.38.dist-info}/WHEEL +0 -0
  55. {camel_ai-0.2.37.dist-info → camel_ai-0.2.38.dist-info}/licenses/LICENSE +0 -0
camel/__init__.py CHANGED
@@ -14,7 +14,7 @@
14
14
 
15
15
  from camel.logger import disable_logging, enable_logging, set_log_level
16
16
 
17
- __version__ = '0.2.37'
17
+ __version__ = '0.2.38'
18
18
 
19
19
  __all__ = [
20
20
  '__version__',
@@ -0,0 +1,20 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+ from .evol_instruct import EvolInstructPipeline
16
+
17
+ __all__ = [
18
+ 'EvolInstructPipeline',
19
+ 'MathEvolInstructTemplates',
20
+ ]
@@ -0,0 +1,424 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+ import random
16
+ import time
17
+ from concurrent.futures import ThreadPoolExecutor
18
+ from math import ceil
19
+ from typing import Any, Dict, List, Optional, Tuple, Type, Union, cast
20
+
21
+ from tqdm import tqdm
22
+
23
+ from camel.agents import ChatAgent
24
+ from camel.datagen.evol_instruct.scorer import BaseScorer, GeneralScorer
25
+ from camel.datagen.evol_instruct.templates import EvolInstructTemplates
26
+ from camel.logger import get_logger
27
+
28
+ logger = get_logger(__name__)
29
+
30
+
31
+ class EvolInstructPipeline:
32
+ r"""Pipeline for evolving prompts using the Evol-Instruct methodology.
33
+
34
+ Supports custom templates defining evolution strategies and methods. The
35
+ pipeline leverages language models to iteratively refine prompts through
36
+ specified evolution strategies.
37
+
38
+ Args:
39
+ templates (Type[EvolInstructTemplates]): Template class containing
40
+ evolution strategy and method definitions. Must provide
41
+ `EVOL_METHODS` and `STRATEGY` attributes.
42
+ (default: :obj:`EvolInstructTemplates`)
43
+ agent (Optional[ChatAgent]): Chat agent instance for LLM interaction.
44
+ If :obj:`None`, initializes with a default ChatAgent.
45
+ (default: :obj:`None`)
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ templates: Type = EvolInstructTemplates,
51
+ agent: Optional[ChatAgent] = None,
52
+ ) -> None:
53
+ r"""Initialize pipeline with templates and language model agent.
54
+
55
+ Args:
56
+ templates (Type[EvolInstructTemplates]): Template class containing
57
+ evolution strategy configurations.
58
+ (default: :obj:`EvolInstructTemplates`)
59
+ agent (Optional[ChatAgent]): Preconfigured chat agent instance.
60
+ Creates a default ChatAgent if not provided.
61
+ (default: :obj:`None`)
62
+ """
63
+ self.templates = templates
64
+ self.agent = agent or ChatAgent()
65
+
66
+ def _resolve_evolution_method(self, method_key: str) -> str:
67
+ r"""Resolve evolution method key to concrete implementation.
68
+
69
+ Args:
70
+ method_key (str): Input method identifier. Can be:
71
+ - Direct method key from templates.EVOL_METHODS
72
+ - Strategy name from templates.STRATEGY keys
73
+
74
+ Returns:
75
+ str: Resolved method key from EVOL_METHODS
76
+ """
77
+ if method_key in self.templates.EVOL_METHODS:
78
+ return method_key
79
+ if method_key.upper() in self.templates.STRATEGY:
80
+ strategy = self.templates.STRATEGY[method_key.upper()]
81
+ strategy_methods = strategy["methods"]
82
+ return random.choice(strategy_methods)
83
+
84
+ logger.warning(
85
+ f"Invalid evolution method: {method_key}. "
86
+ f"Using random selection."
87
+ )
88
+ return random.choice(list(self.templates.EVOL_METHODS))
89
+
90
+ def _get_evolution_methods(
91
+ self,
92
+ method: Union[str, List[str]],
93
+ num_generations: int = 2,
94
+ ) -> List[str]:
95
+ r"""Get list of evolution methods based on input specification.
96
+
97
+ Args:
98
+ method (Union[str, List[str]]): Specification for method selection.
99
+ Can be:
100
+ - Strategy name for methods from that strategy
101
+ - Specific method name
102
+ - List of method specifications
103
+ num_generations (int): Number of methods to return.
104
+
105
+ Returns:
106
+ List[str]: List of resolved method names
107
+ """
108
+ candidate_methods = []
109
+
110
+ if isinstance(method, list):
111
+ for method_spec in method:
112
+ candidate_methods.append(
113
+ self._resolve_evolution_method(method_spec)
114
+ )
115
+ elif isinstance(method, str):
116
+ if method.upper() in self.templates.STRATEGY:
117
+ strategy = self.templates.STRATEGY[method.upper()]
118
+ candidate_methods = strategy["methods"]
119
+ else:
120
+ candidate_methods = [self._resolve_evolution_method(method)]
121
+
122
+ # Remove duplicates while preserving order
123
+ unique_candidates = []
124
+ for method_name in candidate_methods:
125
+ if method_name not in unique_candidates:
126
+ unique_candidates.append(method_name)
127
+
128
+ if len(unique_candidates) >= num_generations:
129
+ methods = random.sample(unique_candidates, num_generations)
130
+ else:
131
+ methods = unique_candidates.copy()
132
+ while len(methods) < num_generations:
133
+ methods.append(random.choice(unique_candidates))
134
+
135
+ return methods
136
+
137
+ def _generate_single_evolution(
138
+ self,
139
+ prompt: str,
140
+ method: str,
141
+ return_method: bool = False,
142
+ ) -> Tuple[str, str]:
143
+ r"""Generate a single evolved prompt from a seed prompt.
144
+
145
+ Args:
146
+ prompt (str): The seed prompt to evolve.
147
+ method (str): The evolution method key to use.
148
+ return_method (bool): If True, returns method along with prompt.
149
+
150
+ Returns:
151
+ Tuple[str, str]: Evolved prompt and method
152
+ """
153
+ resolved_method = self._resolve_evolution_method(method)
154
+
155
+ # Find strategy containing the resolved method
156
+ strategy_key = None
157
+ for strategy, group in self.templates.STRATEGY.items():
158
+ if resolved_method in group["methods"]:
159
+ strategy_key = strategy
160
+ break
161
+
162
+ if strategy_key is None:
163
+ strategy_key = random.choice(list(self.templates.STRATEGY.keys()))
164
+
165
+ strategy = self.templates.STRATEGY[strategy_key]
166
+ instruction_template = strategy["meta_instruction"]
167
+ instruction = instruction_template.format(
168
+ method=self.templates.EVOL_METHODS.get(
169
+ resolved_method,
170
+ random.choice(list(self.templates.EVOL_METHODS.values())),
171
+ ),
172
+ prompt=prompt,
173
+ )
174
+
175
+ self.agent.reset()
176
+ response = self.agent.step(instruction)
177
+ evolved_prompt = response.msgs[0].content.strip()
178
+
179
+ if return_method:
180
+ return (evolved_prompt, resolved_method)
181
+ else:
182
+ return (evolved_prompt, "")
183
+
184
+ def _generate_multiple_evolutions(
185
+ self,
186
+ prompt: str,
187
+ method: Union[str, List[str]],
188
+ num_generations: int = 2,
189
+ keep_original: bool = True,
190
+ num_threads: int = 10,
191
+ ) -> List[Tuple[str, str]]:
192
+ r"""Generate multiple evolved versions of a prompt.
193
+
194
+ Args:
195
+ prompt (str): Seed prompt to evolve.
196
+ method (Union[str, List[str]]): Evolution method specification.
197
+ num_generations (int): Candidates to generate per iteration.
198
+ keep_original (bool): Whether to keep the original prompt.
199
+ num_threads (int): Number of threads for parallel processing.
200
+
201
+ Returns:
202
+ List[Tuple[str, str]]: List of (evolved_prompt, method) pairs
203
+ """
204
+ results = [(prompt, "original")] if keep_original else []
205
+
206
+ if isinstance(method, list) and len(method) == num_generations:
207
+ candidate_methods = method
208
+ else:
209
+ candidate_methods = self._get_evolution_methods(
210
+ method=method, num_generations=num_generations
211
+ )
212
+
213
+ def _process_single_method(method_name: str) -> Tuple[str, str]:
214
+ return self._generate_single_evolution(
215
+ prompt, method_name, return_method=True
216
+ )
217
+
218
+ with ThreadPoolExecutor(max_workers=num_threads) as executor:
219
+ evolved_results = list(
220
+ executor.map(_process_single_method, candidate_methods)
221
+ )
222
+
223
+ results.extend(evolved_results)
224
+ return results
225
+
226
+ def _generate_iterative_evolutions(
227
+ self,
228
+ prompt: str,
229
+ evolution_spec: Union[str, List[Union[str, List[str]]]],
230
+ num_generations: int = 2,
231
+ num_iterations: Optional[int] = None,
232
+ keep_original: bool = True,
233
+ scorer: Optional[BaseScorer] = None,
234
+ num_threads: int = 10,
235
+ ) -> Dict[int, List[Dict[str, Any]]]:
236
+ r"""Generate iterative evolutions of a prompt with scoring.
237
+
238
+ Args:
239
+ prompt (str): Seed prompt to evolve.
240
+ evolution_spec (Union[str, List[Union[str, List[str]]]]):
241
+ Evolution method specification.
242
+ If a list is provided and num_iterations is None, then
243
+ num_iterations is set to the length of the list.
244
+ num_generations (int): Candidates to generate per iteration.
245
+ num_iterations (Optional[int]): Number of evolution iterations.
246
+ Defaults to the length of evolution_spec.
247
+ keep_original (bool): Include original prompt in results.
248
+ scorer (Optional[BaseScorer]): Scoring model for candidate.
249
+ num_threads (int): Number of threads for parallel processing.
250
+
251
+ Returns:
252
+ Dict[int, List[Dict[str, Any]]]: Evolution results per iteration,
253
+ where each candidate is represented as a dict with keys:
254
+ "instruction", "method", and "scores".
255
+ """
256
+ if num_iterations is None:
257
+ if isinstance(evolution_spec, list):
258
+ num_iterations = len(evolution_spec)
259
+ else:
260
+ num_iterations = 1
261
+
262
+ results = {}
263
+ current_prompt = prompt
264
+ scorer = scorer or GeneralScorer()
265
+
266
+ for iteration in range(num_iterations):
267
+ if isinstance(evolution_spec, list):
268
+ if iteration < len(evolution_spec):
269
+ iteration_spec = evolution_spec[iteration]
270
+ else:
271
+ iteration_spec = evolution_spec[-1]
272
+ else:
273
+ iteration_spec = evolution_spec
274
+
275
+ batch_results = self._generate_multiple_evolutions(
276
+ prompt=current_prompt,
277
+ method=iteration_spec,
278
+ num_generations=num_generations,
279
+ keep_original=False,
280
+ num_threads=num_threads,
281
+ )
282
+
283
+ scored_results = []
284
+ for candidate, method_used in batch_results:
285
+ scores = scorer.score(current_prompt, candidate)
286
+ scored_results.append(
287
+ {
288
+ "instruction": candidate,
289
+ "method": method_used,
290
+ "scores": scores,
291
+ }
292
+ )
293
+
294
+ best_index = max(
295
+ range(len(scored_results)),
296
+ key=lambda i: sum(
297
+ cast(Dict[str, int], scored_results[i]["scores"]).values()
298
+ ),
299
+ )
300
+
301
+ best_candidate = cast(
302
+ str, scored_results[best_index]["instruction"]
303
+ )
304
+
305
+ if keep_original:
306
+ results[iteration] = [
307
+ {
308
+ "instruction": current_prompt,
309
+ "method": "original",
310
+ "scores": {},
311
+ },
312
+ *scored_results,
313
+ ]
314
+ else:
315
+ results[iteration] = scored_results
316
+
317
+ current_prompt = best_candidate
318
+
319
+ return results
320
+
321
+ def generate(
322
+ self,
323
+ prompts: List[str],
324
+ evolution_spec: Union[str, List[Union[str, List[str]]]],
325
+ num_generations: int = 2,
326
+ num_iterations: Optional[int] = None,
327
+ keep_original: bool = True,
328
+ scorer: Optional[BaseScorer] = None,
329
+ num_chunks: int = 1,
330
+ retry_limit: int = 3,
331
+ retry_delay: float = 1.0,
332
+ num_threads: int = 10,
333
+ ) -> List[Dict[int, List[Dict[str, Any]]]]:
334
+ r"""Evolve a batch of prompts through iterative refinement.
335
+
336
+ Args:
337
+ prompts (List[str]): Seed prompts to evolve.
338
+ evolution_spec (Union[str, List[Union[str, List[str]]]]):
339
+ Evolution method specification.
340
+ If a list is provided and num_iterations is None, then
341
+ num_iterations is set to the length of the list.
342
+ num_generations (int): Candidates to generate per iteration.
343
+ num_iterations (Optional[int]): Number of evolution iterations.
344
+ Defaults to the length of evolution_spec.
345
+ keep_original (bool): Include original prompts in results.
346
+ scorer (Optional[BaseScorer]): Scoring model for candidate.
347
+ num_chunks (int): Number of parallel processing chunks.
348
+ retry_limit (int): Max retries for failed generations.
349
+ retry_delay (float): Delay between retries in seconds.
350
+ num_threads (int): Number of threads for parallel processing.
351
+
352
+ Returns:
353
+ List[Dict[int, List[Dict[str, Any]]]]: Evolution results.
354
+ """
355
+ if num_iterations is None:
356
+ if isinstance(evolution_spec, list):
357
+ num_iterations = len(evolution_spec)
358
+ else:
359
+ num_iterations = 1
360
+
361
+ evolution_plan: List[List[List[str]]] = []
362
+ for _ in prompts:
363
+ prompt_plan = []
364
+ for iteration in range(num_iterations):
365
+ if isinstance(evolution_spec, list):
366
+ if iteration < len(evolution_spec):
367
+ raw_spec = evolution_spec[iteration]
368
+ else:
369
+ raw_spec = evolution_spec[-1]
370
+ else:
371
+ raw_spec = evolution_spec
372
+ prompt_plan.append(
373
+ self._get_evolution_methods(raw_spec, num_generations)
374
+ )
375
+ evolution_plan.append(prompt_plan)
376
+
377
+ def _process_prompt(
378
+ args: Tuple[str, List[List[str]]],
379
+ ) -> Dict[int, List[Dict[str, Any]]]:
380
+ prompt, methods = args
381
+ retries = 0
382
+ while retries <= retry_limit:
383
+ try:
384
+ return self._generate_iterative_evolutions(
385
+ prompt=prompt,
386
+ evolution_spec=evolution_spec,
387
+ num_generations=num_generations,
388
+ num_iterations=num_iterations,
389
+ keep_original=keep_original,
390
+ scorer=scorer,
391
+ num_threads=num_threads,
392
+ )
393
+ except Exception as e:
394
+ retries += 1
395
+ if retries <= retry_limit:
396
+ logger.warning(
397
+ f"Error processing prompt "
398
+ f"(attempt {retries}/{retry_limit}): {e!s}"
399
+ )
400
+ time.sleep(retry_delay)
401
+ else:
402
+ logger.error("Failed to process prompt.")
403
+ return {}
404
+
405
+ raise RuntimeError("_process_prompt() did not return.")
406
+
407
+ num_chunks = max(1, min(num_chunks, len(prompts)))
408
+ chunk_size = ceil(len(prompts) / num_chunks)
409
+ results = []
410
+
411
+ for chunk_idx in range(0, len(prompts), chunk_size):
412
+ chunk = prompts[chunk_idx : chunk_idx + chunk_size]
413
+ plan_chunk = evolution_plan[chunk_idx : chunk_idx + chunk_size]
414
+
415
+ with ThreadPoolExecutor(max_workers=num_threads) as executor:
416
+ chunk_results = list(
417
+ tqdm(
418
+ executor.map(_process_prompt, zip(chunk, plan_chunk)),
419
+ total=len(chunk),
420
+ )
421
+ )
422
+ results.extend(chunk_results)
423
+
424
+ return results
@@ -0,0 +1,166 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+ import json
16
+ from abc import ABC, abstractmethod
17
+ from typing import Dict, Optional
18
+
19
+ from pydantic import BaseModel, Field
20
+
21
+ from camel.agents import ChatAgent
22
+ from camel.logger import get_logger
23
+
24
+ logger = get_logger(__name__)
25
+
26
+
27
+ class BaseScorer(ABC):
28
+ @abstractmethod
29
+ def score(
30
+ self, reference_prompt: str, candidate_prompt: str
31
+ ) -> Dict[str, int]:
32
+ r"""Compare a candidate prompt against a reference prompt and
33
+ return a tuple of scores. The higher the score, the better.
34
+ For example, (diversity, difficulty, feasibility).
35
+ """
36
+ pass
37
+
38
+
39
+ class MathScorer(BaseScorer):
40
+ def __init__(self, agent: Optional[ChatAgent] = None):
41
+ self.system_msg = (
42
+ "You are an evaluator for math problems. Your task is to compare "
43
+ "a new math problem against a reference math problem, and rate it "
44
+ "in **four dimensions**, each scored from 1 to 5.\n\n"
45
+ "1. Diversity (1-5): How novel is the new problem compared to the "
46
+ "reference? 1 = very similar, 5 = completely different.\n"
47
+ "2. Difficulty (1-5): Rate the relative difficulty compared to the"
48
+ " reference problem. 1 = much less difficult, "
49
+ "3 = similar difficulty, 5 = much more difficult.\n"
50
+ "3. Validity (1-5): How well-defined and sound is the problem?"
51
+ "1 = very vague or flawed, 5 = very clear and rigorous.\n"
52
+ "4. Solvability (1-5): How likely is the problem solvable using "
53
+ "standard math techniques? 1 = very unsolvable or ambiguous, "
54
+ "5 = very clearly solvable.\n\n"
55
+ "Respond with a JSON object like: "
56
+ "{ \"diversity\": ..., \"difficulty\": ..., "
57
+ "\"validity\": ..., \"solvability\": ... }"
58
+ )
59
+ self.agent = agent or ChatAgent(self.system_msg)
60
+
61
+ class MathScoreSchema(BaseModel):
62
+ diversity: int = Field(
63
+ ...,
64
+ description=(
65
+ "Score for the diversity of the math problem "
66
+ "compared to the reference"
67
+ ),
68
+ )
69
+ difficulty: int = Field(
70
+ ..., description="Score for the relative difficulty"
71
+ )
72
+ validity: int = Field(
73
+ ...,
74
+ description="Score for how well-defined and sound the problem is",
75
+ )
76
+ solvability: int = Field(
77
+ ...,
78
+ description="Score for the solvability of the problem",
79
+ )
80
+
81
+ def score(
82
+ self, reference_problem: str, new_problem: str
83
+ ) -> Dict[str, int]:
84
+ r"""Evaluates the new math problem relative to the reference math
85
+ problem.
86
+
87
+ Args:
88
+ reference_problem (str): The reference math problem.
89
+ new_problem (str): The new or evolved math problem.
90
+
91
+ Returns:
92
+ Dict[str, int]: A dictionary with scores for diversity, difficulty,
93
+ validity, and solvability.
94
+ """
95
+ query = (
96
+ f"Reference problem:\n{reference_problem}\n\n"
97
+ f"New problem:\n{new_problem}\n\n"
98
+ "Provide scores in JSON format."
99
+ )
100
+ response = self.agent.step(query, response_format=self.MathScoreSchema)
101
+ score_data = json.loads(response.msg.content)
102
+ return score_data
103
+
104
+
105
+ class GeneralScorer(BaseScorer):
106
+ def __init__(self, agent: Optional[ChatAgent] = None):
107
+ self.system_msg = (
108
+ "You are an evaluator for problems in various domains. Your task "
109
+ "is to compare a new problem against a reference problem, and rate"
110
+ " it in **three dimensions**, each scored from 1 to 5.\n\n"
111
+ "1. Diversity (1-5): How novel is the new problem compared to the "
112
+ "reference? 1 = very similar, 5 = completely different.\n"
113
+ "2. Complexity (1-5): Relative to the reference problem. "
114
+ "1 = much less complex, 3 = similar complexity, "
115
+ "5 = much more complex.\n"
116
+ "3. Validity (1-5): How well-defined, meaningful, the problem is."
117
+ "1 = vague/flawed, 5 = precise and fully meaningful.\n"
118
+ "Respond with a JSON object like: "
119
+ "{ \"diversity\": ..., \"complexity\": ..., \"validity\": ... }"
120
+ )
121
+ self.agent = agent or ChatAgent(self.system_msg)
122
+
123
+ class GeneralScoreSchema(BaseModel):
124
+ diversity: int = Field(
125
+ ...,
126
+ description=(
127
+ "Score for the diversity of the problem "
128
+ "compared to the reference."
129
+ ),
130
+ )
131
+ complexity: int = Field(
132
+ ...,
133
+ description=("Score for the relative complexity of the problem."),
134
+ )
135
+ validity: int = Field(
136
+ ...,
137
+ description=(
138
+ "Score estimating the likelihood that the problem is "
139
+ "well-defined."
140
+ ),
141
+ )
142
+
143
+ def score(
144
+ self, reference_problem: str, new_problem: str
145
+ ) -> Dict[str, int]:
146
+ r"""Evaluates the new problem against the reference problem using
147
+ structured scoring.
148
+
149
+ Args:
150
+ reference_problem (str): The original problem.
151
+ new_problem (str): The evolved or new problem.
152
+
153
+ Returns:
154
+ Dict[str, int]: A dictionary with scores for diversity, complexity,
155
+ and validity.
156
+ """
157
+ query = (
158
+ f"Reference problem:\n{reference_problem}\n\n"
159
+ f"New problem:\n{new_problem}\n\n"
160
+ "Provide scores in JSON format."
161
+ )
162
+ response = self.agent.step(
163
+ query, response_format=self.GeneralScoreSchema
164
+ )
165
+ score_data = json.loads(response.msg.content)
166
+ return score_data