camel-ai 0.2.49__py3-none-any.whl → 0.2.50__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +159 -15
- camel/configs/__init__.py +6 -0
- camel/configs/modelscope_config.py +4 -1
- camel/configs/novita_config.py +102 -0
- camel/configs/qwen_config.py +0 -7
- camel/configs/watsonx_config.py +96 -0
- camel/models/__init__.py +4 -0
- camel/models/model_factory.py +29 -6
- camel/models/modelscope_model.py +175 -2
- camel/models/novita_model.py +95 -0
- camel/models/qwen_model.py +175 -2
- camel/models/watsonx_model.py +253 -0
- camel/societies/workforce/prompts.py +31 -4
- camel/societies/workforce/workforce.py +1 -1
- camel/toolkits/browser_toolkit.py +53 -55
- camel/types/enums.py +223 -1
- camel/types/unified_model_type.py +10 -0
- camel/utils/__init__.py +2 -0
- camel/utils/filename.py +80 -0
- camel/verifiers/__init__.py +2 -0
- camel/verifiers/physics_verifier.py +881 -0
- camel/verifiers/python_verifier.py +16 -31
- {camel_ai-0.2.49.dist-info → camel_ai-0.2.50.dist-info}/METADATA +4 -1
- {camel_ai-0.2.49.dist-info → camel_ai-0.2.50.dist-info}/RECORD +27 -21
- {camel_ai-0.2.49.dist-info → camel_ai-0.2.50.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.49.dist-info → camel_ai-0.2.50.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,881 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
|
|
15
|
+
import asyncio
|
|
16
|
+
import math
|
|
17
|
+
import os
|
|
18
|
+
import re
|
|
19
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
20
|
+
|
|
21
|
+
from camel.extractors.base import BaseExtractor
|
|
22
|
+
from camel.logger import get_logger
|
|
23
|
+
|
|
24
|
+
from .models import VerificationOutcome, VerificationResult
|
|
25
|
+
from .python_verifier import PythonVerifier
|
|
26
|
+
|
|
27
|
+
logger = get_logger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class UnitParser:
|
|
31
|
+
r"""Class for handling unit parsing and manipulation operations."""
|
|
32
|
+
|
|
33
|
+
def __init__(self):
|
|
34
|
+
from sympy.physics import units
|
|
35
|
+
|
|
36
|
+
# Base unit dictionary
|
|
37
|
+
extra_allowed_units = {
|
|
38
|
+
'hrs': units.hour,
|
|
39
|
+
'min': units.minute,
|
|
40
|
+
'Joule': units.joule,
|
|
41
|
+
'Joules': units.joule,
|
|
42
|
+
'circ': units.degree,
|
|
43
|
+
"Omega": units.ohm,
|
|
44
|
+
'%': units.Unit('percent'),
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
self.allowed_units = self._load_sympy_units()
|
|
48
|
+
self.allowed_units.update(extra_allowed_units)
|
|
49
|
+
|
|
50
|
+
# Add SI prefixed units
|
|
51
|
+
self._add_si_prefixes()
|
|
52
|
+
|
|
53
|
+
@staticmethod
|
|
54
|
+
def _load_sympy_units() -> Dict[str, Any]:
|
|
55
|
+
r"""Load all available units from sympy.physics.units.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
Dict[str, Any]: Dictionary mapping unit names to their
|
|
59
|
+
corresponding sympy Quantity objects.
|
|
60
|
+
"""
|
|
61
|
+
from sympy.physics import units
|
|
62
|
+
|
|
63
|
+
sympy_units = {}
|
|
64
|
+
|
|
65
|
+
for attr_name in dir(units):
|
|
66
|
+
unit_obj = getattr(units, attr_name)
|
|
67
|
+
if isinstance(unit_obj, units.Quantity):
|
|
68
|
+
sympy_units[attr_name] = unit_obj
|
|
69
|
+
|
|
70
|
+
return sympy_units
|
|
71
|
+
|
|
72
|
+
def _add_si_prefixes(self):
|
|
73
|
+
r"""Add SI prefixed units (like km, MHz, etc.) to the allowed units."""
|
|
74
|
+
from sympy.physics.units.prefixes import PREFIXES
|
|
75
|
+
|
|
76
|
+
prefixed_units = {}
|
|
77
|
+
for prefix, prefix_obj in PREFIXES.items():
|
|
78
|
+
for unit_name, base_unit in self.allowed_units.copy().items():
|
|
79
|
+
prefixed_unit_name = (
|
|
80
|
+
f"{prefix}{unit_name}" # Example: "MJ", "kN"
|
|
81
|
+
)
|
|
82
|
+
prefixed_units[prefixed_unit_name] = (
|
|
83
|
+
prefix_obj.scale_factor * base_unit
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Add only new prefixed units that don't conflict with existing ones
|
|
87
|
+
prefixed_units = {
|
|
88
|
+
k: v
|
|
89
|
+
for k, v in prefixed_units.items()
|
|
90
|
+
if k not in self.allowed_units
|
|
91
|
+
}
|
|
92
|
+
self.allowed_units.update(prefixed_units)
|
|
93
|
+
|
|
94
|
+
def parse_unit(self, unit_str: str) -> Optional[Any]:
|
|
95
|
+
r"""Parse a unit string into a SymPy expression using the appropriate
|
|
96
|
+
method.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
unit_str (str): The unit string to parse.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
Optional[Any]: SymPy expression representing the unit, or None
|
|
103
|
+
if parsing fails or the unit is dimensionless.
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
import sympy as sp
|
|
107
|
+
from sympy.parsing.sympy_parser import parse_expr
|
|
108
|
+
|
|
109
|
+
if not unit_str or unit_str == "dimensionless":
|
|
110
|
+
return None
|
|
111
|
+
|
|
112
|
+
if "$" in unit_str or "\\" in unit_str:
|
|
113
|
+
# Likely a LaTeX formatted string
|
|
114
|
+
return self.parse_unit_with_latex(unit_str)
|
|
115
|
+
|
|
116
|
+
# Standard unit string
|
|
117
|
+
processed_str = self.preprocess_unit_string(unit_str)
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
expr = parse_expr(
|
|
121
|
+
processed_str, local_dict=self.allowed_units, evaluate=True
|
|
122
|
+
)
|
|
123
|
+
return sp.simplify(expr)
|
|
124
|
+
except Exception as e:
|
|
125
|
+
logger.info(
|
|
126
|
+
f"Failed to parse unit '{unit_str}' (processed a"
|
|
127
|
+
f"s '{processed_str}'): {e}"
|
|
128
|
+
)
|
|
129
|
+
return None
|
|
130
|
+
|
|
131
|
+
def parse_unit_with_latex(self, unit_str: str) -> Any:
|
|
132
|
+
r"""Parse a unit string using SymPy's LaTeX parser.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
unit_str (str): The unit string in LaTeX format.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
Any: SymPy expression representing the unit, or the
|
|
139
|
+
original string if parsing fails.
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
import sympy as sp
|
|
143
|
+
from sympy.parsing.latex import parse_latex
|
|
144
|
+
|
|
145
|
+
# Clean the LaTeX string
|
|
146
|
+
unit_str = unit_str.strip().lstrip("$").rstrip("$").lstrip("^")
|
|
147
|
+
unit_str = re.sub(r'\\mathrm\{([^}]*)\}', r'{\\\1}', unit_str)
|
|
148
|
+
unit_str = re.sub(r'\\text\{(.*?)\}', r'\1', unit_str)
|
|
149
|
+
unit_str = unit_str.replace('~', '')
|
|
150
|
+
|
|
151
|
+
try:
|
|
152
|
+
expr = parse_latex(unit_str)
|
|
153
|
+
logger.info(f"Parsed LaTeX unit: {expr}.")
|
|
154
|
+
except Exception as e:
|
|
155
|
+
logger.warning(f"Failed to parse LaTeX unit '{unit_str}': {e}")
|
|
156
|
+
return unit_str
|
|
157
|
+
|
|
158
|
+
# Substitute allowed unit symbols
|
|
159
|
+
for key, unit_obj in self.allowed_units.items():
|
|
160
|
+
sym = sp.symbols(key)
|
|
161
|
+
expr = expr.subs(sym, unit_obj)
|
|
162
|
+
|
|
163
|
+
simplified_expr = sp.simplify(expr)
|
|
164
|
+
logger.info(f"Simplified LaTeX unit: {simplified_expr}")
|
|
165
|
+
return simplified_expr
|
|
166
|
+
|
|
167
|
+
def detect_scaling_factor(
|
|
168
|
+
self, unit_expr: Any
|
|
169
|
+
) -> Tuple[Union[int, float, Any], Any]:
|
|
170
|
+
r"""Detect a scaling factor in the unit expression.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
unit_expr (Any): The unit expression.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
Tuple[Union[int, float, Any], Any]: Tuple of scale
|
|
177
|
+
factor and base unit.
|
|
178
|
+
"""
|
|
179
|
+
|
|
180
|
+
import sympy as sp
|
|
181
|
+
|
|
182
|
+
value, base_unit = self.extract_value_and_unit(unit_expr)
|
|
183
|
+
|
|
184
|
+
if isinstance(value, (int, float, sp.Number)):
|
|
185
|
+
return value, base_unit
|
|
186
|
+
return 1, unit_expr
|
|
187
|
+
|
|
188
|
+
@staticmethod
|
|
189
|
+
def preprocess_unit_string(unit_str: str) -> str:
|
|
190
|
+
r"""Preprocess a unit string to replace '^' with '**' for
|
|
191
|
+
exponentiation.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
unit_str (str): The unit string to preprocess.
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
str: Preprocessed unit string.
|
|
198
|
+
"""
|
|
199
|
+
superscript_map = {
|
|
200
|
+
"\u00b2": "2", # Superscript ²
|
|
201
|
+
"\u00b3": "3", # Superscript ³
|
|
202
|
+
"\u2070": "0",
|
|
203
|
+
"\u2071": "1",
|
|
204
|
+
"\u2074": "4",
|
|
205
|
+
"\u2075": "5",
|
|
206
|
+
"\u2076": "6",
|
|
207
|
+
"\u2077": "7",
|
|
208
|
+
"\u2078": "8",
|
|
209
|
+
"\u2079": "9",
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
for unicode_char, normal_char in superscript_map.items():
|
|
213
|
+
unit_str = unit_str.replace(unicode_char, "**" + normal_char)
|
|
214
|
+
|
|
215
|
+
unit_str = unit_str.replace('^', '**').strip()
|
|
216
|
+
return unit_str
|
|
217
|
+
|
|
218
|
+
@staticmethod
|
|
219
|
+
def unit_is_none(unit_str: Optional[str]) -> bool:
|
|
220
|
+
r"""Check if a unit string represents 'no unit' or is empty.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
unit_str (Optional[str]): The unit string to check.
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
bool: True if the unit is None or represents 'no unit'.
|
|
227
|
+
"""
|
|
228
|
+
if unit_str is None:
|
|
229
|
+
return True
|
|
230
|
+
|
|
231
|
+
if isinstance(unit_str, str):
|
|
232
|
+
unit_str = unit_str.strip().lower()
|
|
233
|
+
|
|
234
|
+
if unit_str in ['none', '', 'dimensionless', 'unitless']:
|
|
235
|
+
return True
|
|
236
|
+
|
|
237
|
+
return False
|
|
238
|
+
|
|
239
|
+
@staticmethod
|
|
240
|
+
def extract_value_and_unit(
|
|
241
|
+
expr: Any,
|
|
242
|
+
) -> Tuple[Union[int, float, Any], Any]:
|
|
243
|
+
r"""Extract numerical value and unit components from a SymPy
|
|
244
|
+
expression.
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
expr (Any): SymPy expression with units.
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
Tuple[Union[int, float, Any], Any]: Numerical
|
|
251
|
+
value and unit expression.
|
|
252
|
+
"""
|
|
253
|
+
|
|
254
|
+
import sympy as sp
|
|
255
|
+
|
|
256
|
+
factors = sp.Mul.make_args(expr)
|
|
257
|
+
numeric_terms: List[Any] = []
|
|
258
|
+
unit_terms: List[Any] = []
|
|
259
|
+
|
|
260
|
+
for term in factors:
|
|
261
|
+
if isinstance(term, (int, float, sp.Number)):
|
|
262
|
+
numeric_terms.append(term)
|
|
263
|
+
elif isinstance(term, sp.Symbol):
|
|
264
|
+
unit_terms.append(term)
|
|
265
|
+
elif hasattr(term, 'is_commutative') and term.is_commutative:
|
|
266
|
+
# For other expressions like powers
|
|
267
|
+
unit_terms.append(term)
|
|
268
|
+
else:
|
|
269
|
+
# For complex expressions, try to separate
|
|
270
|
+
unit_terms.append(term)
|
|
271
|
+
|
|
272
|
+
value = sp.Mul(*numeric_terms) if numeric_terms else 1
|
|
273
|
+
unit_expr = sp.Mul(*unit_terms) if unit_terms else 1
|
|
274
|
+
|
|
275
|
+
return value, unit_expr
|
|
276
|
+
|
|
277
|
+
@staticmethod
|
|
278
|
+
def detect_unit_args(unit_expr: Any) -> List[Any]:
|
|
279
|
+
r"""Extract the base units from a composite SymPy unit expression.
|
|
280
|
+
|
|
281
|
+
Args:
|
|
282
|
+
unit_expr (Any): SymPy expression representing a composite unit.
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
List[Any]: List of SymPy base unit components.
|
|
286
|
+
"""
|
|
287
|
+
|
|
288
|
+
import sympy as sp
|
|
289
|
+
|
|
290
|
+
factors = sp.Mul.make_args(unit_expr)
|
|
291
|
+
base_units = [
|
|
292
|
+
factor.base
|
|
293
|
+
if hasattr(factor, 'is_Pow') and factor.is_Pow
|
|
294
|
+
else factor
|
|
295
|
+
for factor in factors
|
|
296
|
+
]
|
|
297
|
+
return base_units
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
class PhysicsSolutionComparator:
|
|
301
|
+
r"""Class for compare solutions and reference answers that contains value
|
|
302
|
+
and units.
|
|
303
|
+
|
|
304
|
+
Args:
|
|
305
|
+
solution (str): The output from running the solution code.
|
|
306
|
+
reference_answer (str): The reference answer to compare against.
|
|
307
|
+
float_tolerance (Optional[float], optional): The tolerance for
|
|
308
|
+
floating point comparisons. (default: :obj:`None`)
|
|
309
|
+
"""
|
|
310
|
+
|
|
311
|
+
def __init__(
|
|
312
|
+
self,
|
|
313
|
+
solution: str,
|
|
314
|
+
reference_answer: str,
|
|
315
|
+
float_tolerance: Optional[float] = None,
|
|
316
|
+
) -> None:
|
|
317
|
+
self.solution: str = solution
|
|
318
|
+
self.reference_answer: str = reference_answer
|
|
319
|
+
self.tolerance = (
|
|
320
|
+
float(float_tolerance) if float_tolerance is not None else 1e-2
|
|
321
|
+
)
|
|
322
|
+
self.unit_parser: UnitParser = UnitParser()
|
|
323
|
+
|
|
324
|
+
# Initialize fields that will be set in _get_value_unit_pairs
|
|
325
|
+
self.gt_value: Any = None
|
|
326
|
+
self.gt_unit: str = ''
|
|
327
|
+
self.sol_value: Any = None
|
|
328
|
+
self.sol_unit: str = ''
|
|
329
|
+
self.gt_unit_expr: Any = None
|
|
330
|
+
self.sol_unit_expr: Any = None
|
|
331
|
+
|
|
332
|
+
@staticmethod
|
|
333
|
+
def _split_value_unit(s: str) -> Tuple[str, str]:
|
|
334
|
+
r"""Split a string into value and unit components.
|
|
335
|
+
Handles LaTeX-style units enclosed in dollar signs.
|
|
336
|
+
|
|
337
|
+
Args:
|
|
338
|
+
s (str): The input string.
|
|
339
|
+
|
|
340
|
+
Returns:
|
|
341
|
+
Tuple[str, str]: Tuple of (value, unit) as strings.
|
|
342
|
+
"""
|
|
343
|
+
# Check if we have a LaTeX unit at the end (pattern: $ followed by
|
|
344
|
+
# anything up to $)
|
|
345
|
+
if s.split(' ')[-1] == '':
|
|
346
|
+
return s.strip(), ''
|
|
347
|
+
|
|
348
|
+
latex_unit_match = re.search(r'\s(\$[^$]*\$)$', s)
|
|
349
|
+
|
|
350
|
+
if latex_unit_match:
|
|
351
|
+
# Extract the LaTeX unit part
|
|
352
|
+
unit = latex_unit_match.group(1)
|
|
353
|
+
# Remove the unit part from the original string to get the value
|
|
354
|
+
value = s[: latex_unit_match.start()].strip()
|
|
355
|
+
return value, unit
|
|
356
|
+
|
|
357
|
+
# If no LaTeX unit, fall back to the original logic
|
|
358
|
+
parts = s.split(' ')
|
|
359
|
+
if len(parts) == 1:
|
|
360
|
+
return parts[0], ''
|
|
361
|
+
elif len(parts) == 2:
|
|
362
|
+
return parts[0], parts[1]
|
|
363
|
+
else:
|
|
364
|
+
return ' '.join(parts[:-1]), parts[-1]
|
|
365
|
+
|
|
366
|
+
@staticmethod
|
|
367
|
+
def _clean_answer(raw_answer: str) -> str:
|
|
368
|
+
r"""Clean a raw answer string by removing LaTeX formatting.
|
|
369
|
+
|
|
370
|
+
Args:
|
|
371
|
+
raw_answer (str): The raw answer string potentially containing
|
|
372
|
+
LaTeX formatting.
|
|
373
|
+
|
|
374
|
+
Returns:
|
|
375
|
+
str: The cleaned answer string without LaTeX formatting.
|
|
376
|
+
"""
|
|
377
|
+
# Remove whitespace
|
|
378
|
+
answer = raw_answer.strip()
|
|
379
|
+
|
|
380
|
+
# Remove dollar signs that indicate LaTeX math mode
|
|
381
|
+
if answer.startswith("$") and answer.endswith("$"):
|
|
382
|
+
answer = answer[1:-1].strip()
|
|
383
|
+
|
|
384
|
+
# Replace LaTeX scientific notation format (e.g., 1 \times 10^{14})
|
|
385
|
+
answer = re.sub(
|
|
386
|
+
r'([\d.]+)\s*\\times\s*10\^\{(\d+)\}', r'\1e\2', answer
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
# Remove \mathrm commands
|
|
390
|
+
answer = re.sub(r'\\mathrm\{([^}]*)\}', r'\1', answer)
|
|
391
|
+
|
|
392
|
+
# Remove other common LaTeX formatting
|
|
393
|
+
# answer = answer.replace('\\', '')
|
|
394
|
+
|
|
395
|
+
return answer
|
|
396
|
+
|
|
397
|
+
@staticmethod
|
|
398
|
+
def _parse_expression(expr: Any) -> Any:
|
|
399
|
+
r"""Parse an expression into a SymPy expression.
|
|
400
|
+
|
|
401
|
+
Args:
|
|
402
|
+
expr (Any): Expression to parse, can be a string, number, or other
|
|
403
|
+
type.
|
|
404
|
+
|
|
405
|
+
Returns:
|
|
406
|
+
Any: Parsed SymPy expression.
|
|
407
|
+
"""
|
|
408
|
+
|
|
409
|
+
import sympy as sp
|
|
410
|
+
from sympy.parsing.sympy_parser import parse_expr
|
|
411
|
+
|
|
412
|
+
# If already a number, return as is
|
|
413
|
+
if isinstance(expr, (int, float)):
|
|
414
|
+
return sp.Float(expr)
|
|
415
|
+
|
|
416
|
+
# If not a string, try to convert to string first
|
|
417
|
+
if not isinstance(expr, str):
|
|
418
|
+
try:
|
|
419
|
+
expr = str(expr)
|
|
420
|
+
except Exception as e:
|
|
421
|
+
logger.info(f"Failed to convert expression to string: {e}")
|
|
422
|
+
return expr
|
|
423
|
+
|
|
424
|
+
try:
|
|
425
|
+
# Replace common symbols with their SymPy equivalents
|
|
426
|
+
replacements = {
|
|
427
|
+
sp.Symbol('pi'): sp.pi,
|
|
428
|
+
sp.Symbol('e'): sp.E,
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
parsed_expr = parse_expr(expr)
|
|
432
|
+
for old, new in replacements.items():
|
|
433
|
+
parsed_expr = parsed_expr.subs(old, new)
|
|
434
|
+
|
|
435
|
+
return parsed_expr
|
|
436
|
+
except Exception as e:
|
|
437
|
+
logger.info(f"Failed to parse expression '{expr}': {e}")
|
|
438
|
+
return expr
|
|
439
|
+
|
|
440
|
+
@staticmethod
|
|
441
|
+
def _is_number(s: Any) -> bool:
|
|
442
|
+
r"""Check if a value can be converted to a number.
|
|
443
|
+
|
|
444
|
+
Args:
|
|
445
|
+
s (Any): Value to check.
|
|
446
|
+
|
|
447
|
+
Returns:
|
|
448
|
+
bool: True if the value can be converted to a number.
|
|
449
|
+
"""
|
|
450
|
+
if isinstance(s, (int, float)):
|
|
451
|
+
return True
|
|
452
|
+
|
|
453
|
+
if not isinstance(s, str):
|
|
454
|
+
return False
|
|
455
|
+
|
|
456
|
+
try:
|
|
457
|
+
float(s)
|
|
458
|
+
return True
|
|
459
|
+
except (ValueError, TypeError):
|
|
460
|
+
return False
|
|
461
|
+
|
|
462
|
+
@staticmethod
|
|
463
|
+
def _detect_tolerance(default_tolerance: float, value: str) -> float:
|
|
464
|
+
if 'e' in value:
|
|
465
|
+
match = re.match(r'(-?\d*\.?\d*)[eE]', value)
|
|
466
|
+
significant_part = match.group(1) if match else value
|
|
467
|
+
exponent_match = re.search(r'[eE]([+-]?\d+)', value)
|
|
468
|
+
exponent = int(exponent_match.group(1)) if exponent_match else 0
|
|
469
|
+
else:
|
|
470
|
+
exponent = 0
|
|
471
|
+
significant_part = value
|
|
472
|
+
|
|
473
|
+
if float(value) == 0:
|
|
474
|
+
factor = 1.0
|
|
475
|
+
else:
|
|
476
|
+
factor = float(value)
|
|
477
|
+
|
|
478
|
+
if '.' in significant_part:
|
|
479
|
+
decimal_places = len(significant_part.split('.')[1])
|
|
480
|
+
rel_tol = float(
|
|
481
|
+
abs(round(10 ** (exponent - decimal_places) / factor, 2))
|
|
482
|
+
)
|
|
483
|
+
else:
|
|
484
|
+
rel_tol = float(abs(round(10**exponent / factor, 2)))
|
|
485
|
+
|
|
486
|
+
# limit the maximum tolerance to (default_tolerance, 10 *
|
|
487
|
+
# default_tolerance)
|
|
488
|
+
rel_tol = min(rel_tol, 10 * default_tolerance)
|
|
489
|
+
rel_tol = max(rel_tol, default_tolerance)
|
|
490
|
+
|
|
491
|
+
logger.info(f"Detected tolerance: {rel_tol}")
|
|
492
|
+
|
|
493
|
+
return rel_tol
|
|
494
|
+
|
|
495
|
+
def _convert_units(self) -> None:
|
|
496
|
+
r"""Convert the solution units to match gt units"""
|
|
497
|
+
import sympy as sp
|
|
498
|
+
from sympy.physics import units
|
|
499
|
+
|
|
500
|
+
try:
|
|
501
|
+
sol_with_unit = self.sol_value * self.sol_unit_expr
|
|
502
|
+
|
|
503
|
+
# Get scaling factor and base gt units
|
|
504
|
+
scaling_factor, base_unit = self.unit_parser.detect_scaling_factor(
|
|
505
|
+
self.gt_unit_expr
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
gt_unit_args = self.unit_parser.detect_unit_args(base_unit)
|
|
509
|
+
|
|
510
|
+
if len(gt_unit_args) > 1:
|
|
511
|
+
logger.info(
|
|
512
|
+
f"Ground truth unit is a composite unit "
|
|
513
|
+
f"with: {gt_unit_args}"
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
# Perform the unit conversion
|
|
517
|
+
converted_sol_expr = units.convert_to(sol_with_unit, gt_unit_args)
|
|
518
|
+
logger.info(f'Converted solution expr: {converted_sol_expr}')
|
|
519
|
+
|
|
520
|
+
self.sol_value, self.sol_unit_expr = (
|
|
521
|
+
self.unit_parser.extract_value_and_unit(converted_sol_expr)
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
if not isinstance(self.sol_value, (int, float, sp.Number)):
|
|
525
|
+
raise ValueError(
|
|
526
|
+
f"Failed to extract value from converted "
|
|
527
|
+
f"value: {self.sol_value}"
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
self.sol_value = float(self.sol_value)
|
|
531
|
+
|
|
532
|
+
# Apply scaling factor if needed
|
|
533
|
+
if scaling_factor != 1:
|
|
534
|
+
logger.info(
|
|
535
|
+
f"Applying scaling factor {scaling_factor} for "
|
|
536
|
+
f"ground truth units"
|
|
537
|
+
)
|
|
538
|
+
self.sol_value /= scaling_factor
|
|
539
|
+
self.sol_unit_expr *= scaling_factor
|
|
540
|
+
|
|
541
|
+
logger.info(f'Converted solution value: {self.sol_value}')
|
|
542
|
+
logger.info(f'Converted solution unit: {self.sol_unit_expr}')
|
|
543
|
+
|
|
544
|
+
except Exception as e:
|
|
545
|
+
logger.error(f'Unit conversion failed: {e}')
|
|
546
|
+
|
|
547
|
+
@staticmethod
|
|
548
|
+
def verify_unit(sol_unit_expr: Any, gt_unit_expr: Any) -> bool:
|
|
549
|
+
try:
|
|
550
|
+
import sympy as sp
|
|
551
|
+
from sympy.physics import units
|
|
552
|
+
|
|
553
|
+
logger.info(
|
|
554
|
+
f"Comparing response unit ({sol_unit_expr}) with answer "
|
|
555
|
+
f"unit ({gt_unit_expr})"
|
|
556
|
+
)
|
|
557
|
+
# Case 1: Both are Quantity objects - compare numerically
|
|
558
|
+
if isinstance(sol_unit_expr, units.Quantity) and isinstance(
|
|
559
|
+
gt_unit_expr, units.Quantity
|
|
560
|
+
):
|
|
561
|
+
diff = sp.simplify(sol_unit_expr - gt_unit_expr)
|
|
562
|
+
return diff == 0
|
|
563
|
+
# Case 2: Both are general SymPy expressions (like Mul, Add,
|
|
564
|
+
# Symbol) - try symbolic comparison
|
|
565
|
+
elif isinstance(sol_unit_expr, sp.Expr) and isinstance(
|
|
566
|
+
gt_unit_expr, sp.Expr
|
|
567
|
+
):
|
|
568
|
+
try:
|
|
569
|
+
# Attempt to simplify the difference, handles compatible
|
|
570
|
+
# symbolic units
|
|
571
|
+
diff = sp.simplify(sol_unit_expr - gt_unit_expr)
|
|
572
|
+
return diff == 0
|
|
573
|
+
except TypeError:
|
|
574
|
+
# If subtraction fails (e.g., incompatible symbolic
|
|
575
|
+
# units), compare directly
|
|
576
|
+
return sp.simplify(sol_unit_expr).equals(
|
|
577
|
+
sp.simplify(gt_unit_expr)
|
|
578
|
+
)
|
|
579
|
+
# Case 3: Both are strings - compare directly
|
|
580
|
+
elif isinstance(sol_unit_expr, str) and isinstance(
|
|
581
|
+
gt_unit_expr, str
|
|
582
|
+
):
|
|
583
|
+
return sol_unit_expr.strip() == gt_unit_expr.strip()
|
|
584
|
+
# Case 4: Mixed or other types - cannot compare
|
|
585
|
+
else:
|
|
586
|
+
logger.warning(
|
|
587
|
+
f"Cannot compare units of incompatible "
|
|
588
|
+
f"types: {type(sol_unit_expr)} and {type(gt_unit_expr)}"
|
|
589
|
+
)
|
|
590
|
+
return False
|
|
591
|
+
except Exception as e:
|
|
592
|
+
logger.error("Failed to compare units: %s", e)
|
|
593
|
+
return False
|
|
594
|
+
|
|
595
|
+
def compare_solution_to_reference(self) -> VerificationResult:
|
|
596
|
+
r"""Compare the solution output to the reference answer.
|
|
597
|
+
|
|
598
|
+
Returns:
|
|
599
|
+
VerificationResult with comparison status.
|
|
600
|
+
"""
|
|
601
|
+
try:
|
|
602
|
+
self._get_value_unit_pairs()
|
|
603
|
+
logger.info(
|
|
604
|
+
f"Solution value: {self.sol_value}; Ground truth "
|
|
605
|
+
f"value: {self.gt_value}"
|
|
606
|
+
)
|
|
607
|
+
logger.info(
|
|
608
|
+
f"Solution unit: {self.sol_unit_expr}; Ground truth "
|
|
609
|
+
f"unit: {self.gt_unit_expr}"
|
|
610
|
+
)
|
|
611
|
+
|
|
612
|
+
if self._is_number(self.gt_value):
|
|
613
|
+
# Ensure values are strings before calling string methods
|
|
614
|
+
if isinstance(self.sol_value, str) and isinstance(
|
|
615
|
+
self.gt_value, str
|
|
616
|
+
):
|
|
617
|
+
self.sol_value, self.gt_value = (
|
|
618
|
+
self.sol_value.lower().strip(),
|
|
619
|
+
self.gt_value.lower().strip(),
|
|
620
|
+
)
|
|
621
|
+
result_match = self._compare_numeric_values()
|
|
622
|
+
else:
|
|
623
|
+
result_match = self._compare_symbolic_values()
|
|
624
|
+
|
|
625
|
+
if self.unit_parser.unit_is_none(self.gt_unit):
|
|
626
|
+
# If the answer is dimensionless, the response should also be
|
|
627
|
+
# dimensionless
|
|
628
|
+
unit_match = self.unit_parser.unit_is_none(self.sol_unit)
|
|
629
|
+
elif self.sol_unit_expr is None or self.gt_unit_expr is None:
|
|
630
|
+
unit_match = False
|
|
631
|
+
else:
|
|
632
|
+
unit_match = self.verify_unit(
|
|
633
|
+
self.sol_unit_expr, self.gt_unit_expr
|
|
634
|
+
)
|
|
635
|
+
|
|
636
|
+
if result_match and unit_match:
|
|
637
|
+
return VerificationResult(
|
|
638
|
+
status=VerificationOutcome.SUCCESS,
|
|
639
|
+
result=f'{self.sol_value} {self.sol_unit_expr}',
|
|
640
|
+
)
|
|
641
|
+
elif result_match:
|
|
642
|
+
return VerificationResult(
|
|
643
|
+
status=VerificationOutcome.FAILURE,
|
|
644
|
+
result=f'{self.sol_value} {self.sol_unit_expr}',
|
|
645
|
+
error_message="Units do not match.",
|
|
646
|
+
)
|
|
647
|
+
elif unit_match:
|
|
648
|
+
return VerificationResult(
|
|
649
|
+
status=VerificationOutcome.FAILURE,
|
|
650
|
+
result=f'{self.sol_value} {self.sol_unit_expr}',
|
|
651
|
+
error_message="Values do not match.",
|
|
652
|
+
)
|
|
653
|
+
else:
|
|
654
|
+
return VerificationResult(
|
|
655
|
+
status=VerificationOutcome.FAILURE,
|
|
656
|
+
result=f'{self.sol_value} {self.sol_unit_expr}',
|
|
657
|
+
error_message="Both values and units do not match.",
|
|
658
|
+
)
|
|
659
|
+
except Exception as e:
|
|
660
|
+
return VerificationResult(
|
|
661
|
+
status=VerificationOutcome.ERROR,
|
|
662
|
+
result=f'{self.solution}',
|
|
663
|
+
error_message=f"Comparison failed: {e}",
|
|
664
|
+
)
|
|
665
|
+
|
|
666
|
+
def _get_value_unit_pairs(self) -> None:
|
|
667
|
+
self.gt_value, self.gt_unit = self._split_value_unit(
|
|
668
|
+
self.reference_answer
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
if self.gt_unit == '':
|
|
672
|
+
self.sol_value, self.sol_unit = self.solution, ''
|
|
673
|
+
else:
|
|
674
|
+
self.sol_value, self.sol_unit = self._split_value_unit(
|
|
675
|
+
self.solution
|
|
676
|
+
)
|
|
677
|
+
|
|
678
|
+
self.gt_value = self._clean_answer(self.gt_value)
|
|
679
|
+
|
|
680
|
+
if self.unit_parser.unit_is_none(
|
|
681
|
+
self.gt_unit
|
|
682
|
+
) and self.unit_parser.unit_is_none(self.sol_unit):
|
|
683
|
+
self.gt_unit_expr, self.sol_unit_expr = None, None
|
|
684
|
+
else:
|
|
685
|
+
self.gt_unit_expr = self.unit_parser.parse_unit(self.gt_unit)
|
|
686
|
+
self.sol_unit_expr = self.unit_parser.parse_unit(self.sol_unit)
|
|
687
|
+
|
|
688
|
+
def _compare_numeric_values(self) -> bool:
|
|
689
|
+
r"""Compare numerical values, with unit conversion if needed."""
|
|
690
|
+
rel_tol = self._detect_tolerance(self.tolerance, self.gt_value)
|
|
691
|
+
self.gt_value = float(self.gt_value)
|
|
692
|
+
|
|
693
|
+
if self._is_number(self.sol_value):
|
|
694
|
+
self.sol_value = float(self.sol_value)
|
|
695
|
+
else:
|
|
696
|
+
logger.info(
|
|
697
|
+
f'Convert output expr {self.sol_value} into numerical.'
|
|
698
|
+
)
|
|
699
|
+
try:
|
|
700
|
+
sol_expr = self._parse_expression(self.sol_value)
|
|
701
|
+
self.sol_value = sol_expr.evalf()
|
|
702
|
+
except Exception as e:
|
|
703
|
+
raise ValueError(
|
|
704
|
+
f"Failed to evaluate output {self.sol_value}: {e}"
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
if (
|
|
708
|
+
self.gt_unit_expr is not None
|
|
709
|
+
and self.sol_unit_expr is not None
|
|
710
|
+
and not self.verify_unit(self.gt_unit_expr, self.sol_unit_expr)
|
|
711
|
+
):
|
|
712
|
+
logger.info(
|
|
713
|
+
'Units do not match directly. Attempting conversion...'
|
|
714
|
+
)
|
|
715
|
+
self._convert_units()
|
|
716
|
+
|
|
717
|
+
# try:
|
|
718
|
+
# Compare numerical values
|
|
719
|
+
logger.info(f'Solution value: {self.sol_value}')
|
|
720
|
+
logger.info(f'Ground truth value: {self.gt_value}')
|
|
721
|
+
|
|
722
|
+
return math.isclose(self.sol_value, self.gt_value, rel_tol=rel_tol)
|
|
723
|
+
|
|
724
|
+
def _compare_symbolic_values(self) -> bool:
|
|
725
|
+
r"""Compare symbolic expressions for equivalence."""
|
|
726
|
+
|
|
727
|
+
import sympy as sp
|
|
728
|
+
|
|
729
|
+
gt_expr = self._parse_expression(self.gt_value)
|
|
730
|
+
sol_expr = self._parse_expression(self.sol_value)
|
|
731
|
+
|
|
732
|
+
logger.info(f'Solution expression: {sol_expr}')
|
|
733
|
+
logger.info(f'Ground truth expression: {gt_expr}')
|
|
734
|
+
|
|
735
|
+
sol_symbols = sol_expr.free_symbols
|
|
736
|
+
gt_symbols = gt_expr.free_symbols
|
|
737
|
+
|
|
738
|
+
if sol_symbols != gt_symbols:
|
|
739
|
+
symbol_mapping = {}
|
|
740
|
+
for gt_sym in gt_symbols:
|
|
741
|
+
gt_name = str(gt_sym)
|
|
742
|
+
for sol_sym in sol_symbols:
|
|
743
|
+
if str(sol_sym) == gt_name:
|
|
744
|
+
symbol_mapping[gt_sym] = sol_sym
|
|
745
|
+
break
|
|
746
|
+
gt_expr = gt_expr.subs(symbol_mapping)
|
|
747
|
+
|
|
748
|
+
# Handle Equalities
|
|
749
|
+
if isinstance(sol_expr, sp.Eq) and isinstance(gt_expr, sp.Eq):
|
|
750
|
+
sol_expr = sp.simplify(sol_expr.rhs)
|
|
751
|
+
gt_expr = sp.simplify(gt_expr.rhs)
|
|
752
|
+
elif not isinstance(sol_expr, sp.Eq) and not isinstance(
|
|
753
|
+
gt_expr, sp.Eq
|
|
754
|
+
):
|
|
755
|
+
sol_expr = sp.simplify(sol_expr)
|
|
756
|
+
gt_expr = sp.simplify(gt_expr)
|
|
757
|
+
else:
|
|
758
|
+
raise ValueError(
|
|
759
|
+
f"Cannot compare an equation with a non-equation "
|
|
760
|
+
f"directly: {sol_expr}, {gt_expr}"
|
|
761
|
+
)
|
|
762
|
+
|
|
763
|
+
try:
|
|
764
|
+
self.sol_value = float(
|
|
765
|
+
sol_expr
|
|
766
|
+
if isinstance(sol_expr, (int, float, sp.Number))
|
|
767
|
+
else sol_expr.evalf()
|
|
768
|
+
)
|
|
769
|
+
self.gt_value = float(
|
|
770
|
+
gt_expr
|
|
771
|
+
if isinstance(gt_expr, (int, float, sp.Number))
|
|
772
|
+
else gt_expr.evalf()
|
|
773
|
+
)
|
|
774
|
+
return math.isclose(
|
|
775
|
+
self.sol_value, self.gt_value, rel_tol=self.tolerance
|
|
776
|
+
)
|
|
777
|
+
except Exception:
|
|
778
|
+
try:
|
|
779
|
+
return math.isclose(sol_expr, gt_expr, rel_tol=self.tolerance)
|
|
780
|
+
except Exception:
|
|
781
|
+
return sol_expr == gt_expr
|
|
782
|
+
|
|
783
|
+
|
|
784
|
+
class PhysicsVerifier(PythonVerifier):
|
|
785
|
+
r"""The PhysicsVerifier inherits PythonVerifier and makes it able to
|
|
786
|
+
compare and convert units.
|
|
787
|
+
|
|
788
|
+
Args:
|
|
789
|
+
extractor (Optional[BaseExtractor]): The extractor to use for
|
|
790
|
+
extracting code from messages. (default: :obj:`None`)
|
|
791
|
+
timeout (Optional[float]): The timeout for code execution in seconds.
|
|
792
|
+
(default: :obj:`30.0`)
|
|
793
|
+
required_packages (Optional[List[str]]): The required packages for code
|
|
794
|
+
execution. (default: :obj:`None`)
|
|
795
|
+
float_tolerance (Optional[float]): The relative tolerance used to
|
|
796
|
+
compare numerical values. (default: :obj:`None`)
|
|
797
|
+
**kwargs: Additional keyword arguments to pass to the parent class.
|
|
798
|
+
"""
|
|
799
|
+
|
|
800
|
+
def __init__(
|
|
801
|
+
self,
|
|
802
|
+
extractor: Optional[BaseExtractor] = None,
|
|
803
|
+
timeout: Optional[float] = 30.0,
|
|
804
|
+
required_packages: Optional[List[str]] = None,
|
|
805
|
+
float_tolerance: Optional[float] = None,
|
|
806
|
+
**kwargs,
|
|
807
|
+
) -> None:
|
|
808
|
+
super().__init__(
|
|
809
|
+
extractor=extractor,
|
|
810
|
+
timeout=timeout,
|
|
811
|
+
required_packages=required_packages,
|
|
812
|
+
**kwargs,
|
|
813
|
+
)
|
|
814
|
+
self.tolerance = float_tolerance
|
|
815
|
+
|
|
816
|
+
async def _verify_implementation(
|
|
817
|
+
self, solution: str, reference_answer: Optional[str]
|
|
818
|
+
) -> VerificationResult:
|
|
819
|
+
# Check for virtual environment setup
|
|
820
|
+
if not self.venv_path:
|
|
821
|
+
return VerificationResult(
|
|
822
|
+
status=VerificationOutcome.ERROR,
|
|
823
|
+
result="",
|
|
824
|
+
error_message="Virtual environment is not set up.",
|
|
825
|
+
)
|
|
826
|
+
|
|
827
|
+
# run the code block,
|
|
828
|
+
# which should already include a print(...) in the end
|
|
829
|
+
venv_python = os.path.join(
|
|
830
|
+
self.venv_path,
|
|
831
|
+
self.bin_dir,
|
|
832
|
+
"python.exe" if os.name == 'nt' else "python",
|
|
833
|
+
)
|
|
834
|
+
if not os.path.exists(venv_python):
|
|
835
|
+
return VerificationResult(
|
|
836
|
+
status=VerificationOutcome.ERROR,
|
|
837
|
+
result="",
|
|
838
|
+
error_message="Python binary not found in virtual environment",
|
|
839
|
+
)
|
|
840
|
+
|
|
841
|
+
try:
|
|
842
|
+
sol_out, sol_err, sol_code = await self._run_code_block(
|
|
843
|
+
solution, venv_python
|
|
844
|
+
)
|
|
845
|
+
if sol_code != 0:
|
|
846
|
+
return VerificationResult(
|
|
847
|
+
status=VerificationOutcome.ERROR,
|
|
848
|
+
result=sol_out,
|
|
849
|
+
error_message=f"Solution code error:\n{sol_err}",
|
|
850
|
+
)
|
|
851
|
+
|
|
852
|
+
logger.info(f"Solution: {sol_out}")
|
|
853
|
+
|
|
854
|
+
if reference_answer is None:
|
|
855
|
+
return VerificationResult(
|
|
856
|
+
status=VerificationOutcome.ERROR,
|
|
857
|
+
result="",
|
|
858
|
+
error_message=(
|
|
859
|
+
"Reference answer is required for physics "
|
|
860
|
+
"verification."
|
|
861
|
+
),
|
|
862
|
+
)
|
|
863
|
+
|
|
864
|
+
comparator = PhysicsSolutionComparator(
|
|
865
|
+
sol_out, reference_answer, self.tolerance
|
|
866
|
+
)
|
|
867
|
+
|
|
868
|
+
return comparator.compare_solution_to_reference()
|
|
869
|
+
|
|
870
|
+
except asyncio.TimeoutError:
|
|
871
|
+
return VerificationResult(
|
|
872
|
+
status=VerificationOutcome.TIMEOUT,
|
|
873
|
+
result="",
|
|
874
|
+
error_message="Execution timed out.",
|
|
875
|
+
)
|
|
876
|
+
except Exception as e:
|
|
877
|
+
return VerificationResult(
|
|
878
|
+
status=VerificationOutcome.ERROR,
|
|
879
|
+
result="",
|
|
880
|
+
error_message=f"Unexpected error: {e}",
|
|
881
|
+
)
|