camel-ai 0.2.49__py3-none-any.whl → 0.2.50__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

@@ -0,0 +1,881 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+ import asyncio
16
+ import math
17
+ import os
18
+ import re
19
+ from typing import Any, Dict, List, Optional, Tuple, Union
20
+
21
+ from camel.extractors.base import BaseExtractor
22
+ from camel.logger import get_logger
23
+
24
+ from .models import VerificationOutcome, VerificationResult
25
+ from .python_verifier import PythonVerifier
26
+
27
+ logger = get_logger(__name__)
28
+
29
+
30
+ class UnitParser:
31
+ r"""Class for handling unit parsing and manipulation operations."""
32
+
33
+ def __init__(self):
34
+ from sympy.physics import units
35
+
36
+ # Base unit dictionary
37
+ extra_allowed_units = {
38
+ 'hrs': units.hour,
39
+ 'min': units.minute,
40
+ 'Joule': units.joule,
41
+ 'Joules': units.joule,
42
+ 'circ': units.degree,
43
+ "Omega": units.ohm,
44
+ '%': units.Unit('percent'),
45
+ }
46
+
47
+ self.allowed_units = self._load_sympy_units()
48
+ self.allowed_units.update(extra_allowed_units)
49
+
50
+ # Add SI prefixed units
51
+ self._add_si_prefixes()
52
+
53
+ @staticmethod
54
+ def _load_sympy_units() -> Dict[str, Any]:
55
+ r"""Load all available units from sympy.physics.units.
56
+
57
+ Returns:
58
+ Dict[str, Any]: Dictionary mapping unit names to their
59
+ corresponding sympy Quantity objects.
60
+ """
61
+ from sympy.physics import units
62
+
63
+ sympy_units = {}
64
+
65
+ for attr_name in dir(units):
66
+ unit_obj = getattr(units, attr_name)
67
+ if isinstance(unit_obj, units.Quantity):
68
+ sympy_units[attr_name] = unit_obj
69
+
70
+ return sympy_units
71
+
72
+ def _add_si_prefixes(self):
73
+ r"""Add SI prefixed units (like km, MHz, etc.) to the allowed units."""
74
+ from sympy.physics.units.prefixes import PREFIXES
75
+
76
+ prefixed_units = {}
77
+ for prefix, prefix_obj in PREFIXES.items():
78
+ for unit_name, base_unit in self.allowed_units.copy().items():
79
+ prefixed_unit_name = (
80
+ f"{prefix}{unit_name}" # Example: "MJ", "kN"
81
+ )
82
+ prefixed_units[prefixed_unit_name] = (
83
+ prefix_obj.scale_factor * base_unit
84
+ )
85
+
86
+ # Add only new prefixed units that don't conflict with existing ones
87
+ prefixed_units = {
88
+ k: v
89
+ for k, v in prefixed_units.items()
90
+ if k not in self.allowed_units
91
+ }
92
+ self.allowed_units.update(prefixed_units)
93
+
94
+ def parse_unit(self, unit_str: str) -> Optional[Any]:
95
+ r"""Parse a unit string into a SymPy expression using the appropriate
96
+ method.
97
+
98
+ Args:
99
+ unit_str (str): The unit string to parse.
100
+
101
+ Returns:
102
+ Optional[Any]: SymPy expression representing the unit, or None
103
+ if parsing fails or the unit is dimensionless.
104
+ """
105
+
106
+ import sympy as sp
107
+ from sympy.parsing.sympy_parser import parse_expr
108
+
109
+ if not unit_str or unit_str == "dimensionless":
110
+ return None
111
+
112
+ if "$" in unit_str or "\\" in unit_str:
113
+ # Likely a LaTeX formatted string
114
+ return self.parse_unit_with_latex(unit_str)
115
+
116
+ # Standard unit string
117
+ processed_str = self.preprocess_unit_string(unit_str)
118
+
119
+ try:
120
+ expr = parse_expr(
121
+ processed_str, local_dict=self.allowed_units, evaluate=True
122
+ )
123
+ return sp.simplify(expr)
124
+ except Exception as e:
125
+ logger.info(
126
+ f"Failed to parse unit '{unit_str}' (processed a"
127
+ f"s '{processed_str}'): {e}"
128
+ )
129
+ return None
130
+
131
+ def parse_unit_with_latex(self, unit_str: str) -> Any:
132
+ r"""Parse a unit string using SymPy's LaTeX parser.
133
+
134
+ Args:
135
+ unit_str (str): The unit string in LaTeX format.
136
+
137
+ Returns:
138
+ Any: SymPy expression representing the unit, or the
139
+ original string if parsing fails.
140
+ """
141
+
142
+ import sympy as sp
143
+ from sympy.parsing.latex import parse_latex
144
+
145
+ # Clean the LaTeX string
146
+ unit_str = unit_str.strip().lstrip("$").rstrip("$").lstrip("^")
147
+ unit_str = re.sub(r'\\mathrm\{([^}]*)\}', r'{\\\1}', unit_str)
148
+ unit_str = re.sub(r'\\text\{(.*?)\}', r'\1', unit_str)
149
+ unit_str = unit_str.replace('~', '')
150
+
151
+ try:
152
+ expr = parse_latex(unit_str)
153
+ logger.info(f"Parsed LaTeX unit: {expr}.")
154
+ except Exception as e:
155
+ logger.warning(f"Failed to parse LaTeX unit '{unit_str}': {e}")
156
+ return unit_str
157
+
158
+ # Substitute allowed unit symbols
159
+ for key, unit_obj in self.allowed_units.items():
160
+ sym = sp.symbols(key)
161
+ expr = expr.subs(sym, unit_obj)
162
+
163
+ simplified_expr = sp.simplify(expr)
164
+ logger.info(f"Simplified LaTeX unit: {simplified_expr}")
165
+ return simplified_expr
166
+
167
+ def detect_scaling_factor(
168
+ self, unit_expr: Any
169
+ ) -> Tuple[Union[int, float, Any], Any]:
170
+ r"""Detect a scaling factor in the unit expression.
171
+
172
+ Args:
173
+ unit_expr (Any): The unit expression.
174
+
175
+ Returns:
176
+ Tuple[Union[int, float, Any], Any]: Tuple of scale
177
+ factor and base unit.
178
+ """
179
+
180
+ import sympy as sp
181
+
182
+ value, base_unit = self.extract_value_and_unit(unit_expr)
183
+
184
+ if isinstance(value, (int, float, sp.Number)):
185
+ return value, base_unit
186
+ return 1, unit_expr
187
+
188
+ @staticmethod
189
+ def preprocess_unit_string(unit_str: str) -> str:
190
+ r"""Preprocess a unit string to replace '^' with '**' for
191
+ exponentiation.
192
+
193
+ Args:
194
+ unit_str (str): The unit string to preprocess.
195
+
196
+ Returns:
197
+ str: Preprocessed unit string.
198
+ """
199
+ superscript_map = {
200
+ "\u00b2": "2", # Superscript ²
201
+ "\u00b3": "3", # Superscript ³
202
+ "\u2070": "0",
203
+ "\u2071": "1",
204
+ "\u2074": "4",
205
+ "\u2075": "5",
206
+ "\u2076": "6",
207
+ "\u2077": "7",
208
+ "\u2078": "8",
209
+ "\u2079": "9",
210
+ }
211
+
212
+ for unicode_char, normal_char in superscript_map.items():
213
+ unit_str = unit_str.replace(unicode_char, "**" + normal_char)
214
+
215
+ unit_str = unit_str.replace('^', '**').strip()
216
+ return unit_str
217
+
218
+ @staticmethod
219
+ def unit_is_none(unit_str: Optional[str]) -> bool:
220
+ r"""Check if a unit string represents 'no unit' or is empty.
221
+
222
+ Args:
223
+ unit_str (Optional[str]): The unit string to check.
224
+
225
+ Returns:
226
+ bool: True if the unit is None or represents 'no unit'.
227
+ """
228
+ if unit_str is None:
229
+ return True
230
+
231
+ if isinstance(unit_str, str):
232
+ unit_str = unit_str.strip().lower()
233
+
234
+ if unit_str in ['none', '', 'dimensionless', 'unitless']:
235
+ return True
236
+
237
+ return False
238
+
239
+ @staticmethod
240
+ def extract_value_and_unit(
241
+ expr: Any,
242
+ ) -> Tuple[Union[int, float, Any], Any]:
243
+ r"""Extract numerical value and unit components from a SymPy
244
+ expression.
245
+
246
+ Args:
247
+ expr (Any): SymPy expression with units.
248
+
249
+ Returns:
250
+ Tuple[Union[int, float, Any], Any]: Numerical
251
+ value and unit expression.
252
+ """
253
+
254
+ import sympy as sp
255
+
256
+ factors = sp.Mul.make_args(expr)
257
+ numeric_terms: List[Any] = []
258
+ unit_terms: List[Any] = []
259
+
260
+ for term in factors:
261
+ if isinstance(term, (int, float, sp.Number)):
262
+ numeric_terms.append(term)
263
+ elif isinstance(term, sp.Symbol):
264
+ unit_terms.append(term)
265
+ elif hasattr(term, 'is_commutative') and term.is_commutative:
266
+ # For other expressions like powers
267
+ unit_terms.append(term)
268
+ else:
269
+ # For complex expressions, try to separate
270
+ unit_terms.append(term)
271
+
272
+ value = sp.Mul(*numeric_terms) if numeric_terms else 1
273
+ unit_expr = sp.Mul(*unit_terms) if unit_terms else 1
274
+
275
+ return value, unit_expr
276
+
277
+ @staticmethod
278
+ def detect_unit_args(unit_expr: Any) -> List[Any]:
279
+ r"""Extract the base units from a composite SymPy unit expression.
280
+
281
+ Args:
282
+ unit_expr (Any): SymPy expression representing a composite unit.
283
+
284
+ Returns:
285
+ List[Any]: List of SymPy base unit components.
286
+ """
287
+
288
+ import sympy as sp
289
+
290
+ factors = sp.Mul.make_args(unit_expr)
291
+ base_units = [
292
+ factor.base
293
+ if hasattr(factor, 'is_Pow') and factor.is_Pow
294
+ else factor
295
+ for factor in factors
296
+ ]
297
+ return base_units
298
+
299
+
300
+ class PhysicsSolutionComparator:
301
+ r"""Class for compare solutions and reference answers that contains value
302
+ and units.
303
+
304
+ Args:
305
+ solution (str): The output from running the solution code.
306
+ reference_answer (str): The reference answer to compare against.
307
+ float_tolerance (Optional[float], optional): The tolerance for
308
+ floating point comparisons. (default: :obj:`None`)
309
+ """
310
+
311
+ def __init__(
312
+ self,
313
+ solution: str,
314
+ reference_answer: str,
315
+ float_tolerance: Optional[float] = None,
316
+ ) -> None:
317
+ self.solution: str = solution
318
+ self.reference_answer: str = reference_answer
319
+ self.tolerance = (
320
+ float(float_tolerance) if float_tolerance is not None else 1e-2
321
+ )
322
+ self.unit_parser: UnitParser = UnitParser()
323
+
324
+ # Initialize fields that will be set in _get_value_unit_pairs
325
+ self.gt_value: Any = None
326
+ self.gt_unit: str = ''
327
+ self.sol_value: Any = None
328
+ self.sol_unit: str = ''
329
+ self.gt_unit_expr: Any = None
330
+ self.sol_unit_expr: Any = None
331
+
332
+ @staticmethod
333
+ def _split_value_unit(s: str) -> Tuple[str, str]:
334
+ r"""Split a string into value and unit components.
335
+ Handles LaTeX-style units enclosed in dollar signs.
336
+
337
+ Args:
338
+ s (str): The input string.
339
+
340
+ Returns:
341
+ Tuple[str, str]: Tuple of (value, unit) as strings.
342
+ """
343
+ # Check if we have a LaTeX unit at the end (pattern: $ followed by
344
+ # anything up to $)
345
+ if s.split(' ')[-1] == '':
346
+ return s.strip(), ''
347
+
348
+ latex_unit_match = re.search(r'\s(\$[^$]*\$)$', s)
349
+
350
+ if latex_unit_match:
351
+ # Extract the LaTeX unit part
352
+ unit = latex_unit_match.group(1)
353
+ # Remove the unit part from the original string to get the value
354
+ value = s[: latex_unit_match.start()].strip()
355
+ return value, unit
356
+
357
+ # If no LaTeX unit, fall back to the original logic
358
+ parts = s.split(' ')
359
+ if len(parts) == 1:
360
+ return parts[0], ''
361
+ elif len(parts) == 2:
362
+ return parts[0], parts[1]
363
+ else:
364
+ return ' '.join(parts[:-1]), parts[-1]
365
+
366
+ @staticmethod
367
+ def _clean_answer(raw_answer: str) -> str:
368
+ r"""Clean a raw answer string by removing LaTeX formatting.
369
+
370
+ Args:
371
+ raw_answer (str): The raw answer string potentially containing
372
+ LaTeX formatting.
373
+
374
+ Returns:
375
+ str: The cleaned answer string without LaTeX formatting.
376
+ """
377
+ # Remove whitespace
378
+ answer = raw_answer.strip()
379
+
380
+ # Remove dollar signs that indicate LaTeX math mode
381
+ if answer.startswith("$") and answer.endswith("$"):
382
+ answer = answer[1:-1].strip()
383
+
384
+ # Replace LaTeX scientific notation format (e.g., 1 \times 10^{14})
385
+ answer = re.sub(
386
+ r'([\d.]+)\s*\\times\s*10\^\{(\d+)\}', r'\1e\2', answer
387
+ )
388
+
389
+ # Remove \mathrm commands
390
+ answer = re.sub(r'\\mathrm\{([^}]*)\}', r'\1', answer)
391
+
392
+ # Remove other common LaTeX formatting
393
+ # answer = answer.replace('\\', '')
394
+
395
+ return answer
396
+
397
+ @staticmethod
398
+ def _parse_expression(expr: Any) -> Any:
399
+ r"""Parse an expression into a SymPy expression.
400
+
401
+ Args:
402
+ expr (Any): Expression to parse, can be a string, number, or other
403
+ type.
404
+
405
+ Returns:
406
+ Any: Parsed SymPy expression.
407
+ """
408
+
409
+ import sympy as sp
410
+ from sympy.parsing.sympy_parser import parse_expr
411
+
412
+ # If already a number, return as is
413
+ if isinstance(expr, (int, float)):
414
+ return sp.Float(expr)
415
+
416
+ # If not a string, try to convert to string first
417
+ if not isinstance(expr, str):
418
+ try:
419
+ expr = str(expr)
420
+ except Exception as e:
421
+ logger.info(f"Failed to convert expression to string: {e}")
422
+ return expr
423
+
424
+ try:
425
+ # Replace common symbols with their SymPy equivalents
426
+ replacements = {
427
+ sp.Symbol('pi'): sp.pi,
428
+ sp.Symbol('e'): sp.E,
429
+ }
430
+
431
+ parsed_expr = parse_expr(expr)
432
+ for old, new in replacements.items():
433
+ parsed_expr = parsed_expr.subs(old, new)
434
+
435
+ return parsed_expr
436
+ except Exception as e:
437
+ logger.info(f"Failed to parse expression '{expr}': {e}")
438
+ return expr
439
+
440
+ @staticmethod
441
+ def _is_number(s: Any) -> bool:
442
+ r"""Check if a value can be converted to a number.
443
+
444
+ Args:
445
+ s (Any): Value to check.
446
+
447
+ Returns:
448
+ bool: True if the value can be converted to a number.
449
+ """
450
+ if isinstance(s, (int, float)):
451
+ return True
452
+
453
+ if not isinstance(s, str):
454
+ return False
455
+
456
+ try:
457
+ float(s)
458
+ return True
459
+ except (ValueError, TypeError):
460
+ return False
461
+
462
+ @staticmethod
463
+ def _detect_tolerance(default_tolerance: float, value: str) -> float:
464
+ if 'e' in value:
465
+ match = re.match(r'(-?\d*\.?\d*)[eE]', value)
466
+ significant_part = match.group(1) if match else value
467
+ exponent_match = re.search(r'[eE]([+-]?\d+)', value)
468
+ exponent = int(exponent_match.group(1)) if exponent_match else 0
469
+ else:
470
+ exponent = 0
471
+ significant_part = value
472
+
473
+ if float(value) == 0:
474
+ factor = 1.0
475
+ else:
476
+ factor = float(value)
477
+
478
+ if '.' in significant_part:
479
+ decimal_places = len(significant_part.split('.')[1])
480
+ rel_tol = float(
481
+ abs(round(10 ** (exponent - decimal_places) / factor, 2))
482
+ )
483
+ else:
484
+ rel_tol = float(abs(round(10**exponent / factor, 2)))
485
+
486
+ # limit the maximum tolerance to (default_tolerance, 10 *
487
+ # default_tolerance)
488
+ rel_tol = min(rel_tol, 10 * default_tolerance)
489
+ rel_tol = max(rel_tol, default_tolerance)
490
+
491
+ logger.info(f"Detected tolerance: {rel_tol}")
492
+
493
+ return rel_tol
494
+
495
+ def _convert_units(self) -> None:
496
+ r"""Convert the solution units to match gt units"""
497
+ import sympy as sp
498
+ from sympy.physics import units
499
+
500
+ try:
501
+ sol_with_unit = self.sol_value * self.sol_unit_expr
502
+
503
+ # Get scaling factor and base gt units
504
+ scaling_factor, base_unit = self.unit_parser.detect_scaling_factor(
505
+ self.gt_unit_expr
506
+ )
507
+
508
+ gt_unit_args = self.unit_parser.detect_unit_args(base_unit)
509
+
510
+ if len(gt_unit_args) > 1:
511
+ logger.info(
512
+ f"Ground truth unit is a composite unit "
513
+ f"with: {gt_unit_args}"
514
+ )
515
+
516
+ # Perform the unit conversion
517
+ converted_sol_expr = units.convert_to(sol_with_unit, gt_unit_args)
518
+ logger.info(f'Converted solution expr: {converted_sol_expr}')
519
+
520
+ self.sol_value, self.sol_unit_expr = (
521
+ self.unit_parser.extract_value_and_unit(converted_sol_expr)
522
+ )
523
+
524
+ if not isinstance(self.sol_value, (int, float, sp.Number)):
525
+ raise ValueError(
526
+ f"Failed to extract value from converted "
527
+ f"value: {self.sol_value}"
528
+ )
529
+
530
+ self.sol_value = float(self.sol_value)
531
+
532
+ # Apply scaling factor if needed
533
+ if scaling_factor != 1:
534
+ logger.info(
535
+ f"Applying scaling factor {scaling_factor} for "
536
+ f"ground truth units"
537
+ )
538
+ self.sol_value /= scaling_factor
539
+ self.sol_unit_expr *= scaling_factor
540
+
541
+ logger.info(f'Converted solution value: {self.sol_value}')
542
+ logger.info(f'Converted solution unit: {self.sol_unit_expr}')
543
+
544
+ except Exception as e:
545
+ logger.error(f'Unit conversion failed: {e}')
546
+
547
+ @staticmethod
548
+ def verify_unit(sol_unit_expr: Any, gt_unit_expr: Any) -> bool:
549
+ try:
550
+ import sympy as sp
551
+ from sympy.physics import units
552
+
553
+ logger.info(
554
+ f"Comparing response unit ({sol_unit_expr}) with answer "
555
+ f"unit ({gt_unit_expr})"
556
+ )
557
+ # Case 1: Both are Quantity objects - compare numerically
558
+ if isinstance(sol_unit_expr, units.Quantity) and isinstance(
559
+ gt_unit_expr, units.Quantity
560
+ ):
561
+ diff = sp.simplify(sol_unit_expr - gt_unit_expr)
562
+ return diff == 0
563
+ # Case 2: Both are general SymPy expressions (like Mul, Add,
564
+ # Symbol) - try symbolic comparison
565
+ elif isinstance(sol_unit_expr, sp.Expr) and isinstance(
566
+ gt_unit_expr, sp.Expr
567
+ ):
568
+ try:
569
+ # Attempt to simplify the difference, handles compatible
570
+ # symbolic units
571
+ diff = sp.simplify(sol_unit_expr - gt_unit_expr)
572
+ return diff == 0
573
+ except TypeError:
574
+ # If subtraction fails (e.g., incompatible symbolic
575
+ # units), compare directly
576
+ return sp.simplify(sol_unit_expr).equals(
577
+ sp.simplify(gt_unit_expr)
578
+ )
579
+ # Case 3: Both are strings - compare directly
580
+ elif isinstance(sol_unit_expr, str) and isinstance(
581
+ gt_unit_expr, str
582
+ ):
583
+ return sol_unit_expr.strip() == gt_unit_expr.strip()
584
+ # Case 4: Mixed or other types - cannot compare
585
+ else:
586
+ logger.warning(
587
+ f"Cannot compare units of incompatible "
588
+ f"types: {type(sol_unit_expr)} and {type(gt_unit_expr)}"
589
+ )
590
+ return False
591
+ except Exception as e:
592
+ logger.error("Failed to compare units: %s", e)
593
+ return False
594
+
595
+ def compare_solution_to_reference(self) -> VerificationResult:
596
+ r"""Compare the solution output to the reference answer.
597
+
598
+ Returns:
599
+ VerificationResult with comparison status.
600
+ """
601
+ try:
602
+ self._get_value_unit_pairs()
603
+ logger.info(
604
+ f"Solution value: {self.sol_value}; Ground truth "
605
+ f"value: {self.gt_value}"
606
+ )
607
+ logger.info(
608
+ f"Solution unit: {self.sol_unit_expr}; Ground truth "
609
+ f"unit: {self.gt_unit_expr}"
610
+ )
611
+
612
+ if self._is_number(self.gt_value):
613
+ # Ensure values are strings before calling string methods
614
+ if isinstance(self.sol_value, str) and isinstance(
615
+ self.gt_value, str
616
+ ):
617
+ self.sol_value, self.gt_value = (
618
+ self.sol_value.lower().strip(),
619
+ self.gt_value.lower().strip(),
620
+ )
621
+ result_match = self._compare_numeric_values()
622
+ else:
623
+ result_match = self._compare_symbolic_values()
624
+
625
+ if self.unit_parser.unit_is_none(self.gt_unit):
626
+ # If the answer is dimensionless, the response should also be
627
+ # dimensionless
628
+ unit_match = self.unit_parser.unit_is_none(self.sol_unit)
629
+ elif self.sol_unit_expr is None or self.gt_unit_expr is None:
630
+ unit_match = False
631
+ else:
632
+ unit_match = self.verify_unit(
633
+ self.sol_unit_expr, self.gt_unit_expr
634
+ )
635
+
636
+ if result_match and unit_match:
637
+ return VerificationResult(
638
+ status=VerificationOutcome.SUCCESS,
639
+ result=f'{self.sol_value} {self.sol_unit_expr}',
640
+ )
641
+ elif result_match:
642
+ return VerificationResult(
643
+ status=VerificationOutcome.FAILURE,
644
+ result=f'{self.sol_value} {self.sol_unit_expr}',
645
+ error_message="Units do not match.",
646
+ )
647
+ elif unit_match:
648
+ return VerificationResult(
649
+ status=VerificationOutcome.FAILURE,
650
+ result=f'{self.sol_value} {self.sol_unit_expr}',
651
+ error_message="Values do not match.",
652
+ )
653
+ else:
654
+ return VerificationResult(
655
+ status=VerificationOutcome.FAILURE,
656
+ result=f'{self.sol_value} {self.sol_unit_expr}',
657
+ error_message="Both values and units do not match.",
658
+ )
659
+ except Exception as e:
660
+ return VerificationResult(
661
+ status=VerificationOutcome.ERROR,
662
+ result=f'{self.solution}',
663
+ error_message=f"Comparison failed: {e}",
664
+ )
665
+
666
+ def _get_value_unit_pairs(self) -> None:
667
+ self.gt_value, self.gt_unit = self._split_value_unit(
668
+ self.reference_answer
669
+ )
670
+
671
+ if self.gt_unit == '':
672
+ self.sol_value, self.sol_unit = self.solution, ''
673
+ else:
674
+ self.sol_value, self.sol_unit = self._split_value_unit(
675
+ self.solution
676
+ )
677
+
678
+ self.gt_value = self._clean_answer(self.gt_value)
679
+
680
+ if self.unit_parser.unit_is_none(
681
+ self.gt_unit
682
+ ) and self.unit_parser.unit_is_none(self.sol_unit):
683
+ self.gt_unit_expr, self.sol_unit_expr = None, None
684
+ else:
685
+ self.gt_unit_expr = self.unit_parser.parse_unit(self.gt_unit)
686
+ self.sol_unit_expr = self.unit_parser.parse_unit(self.sol_unit)
687
+
688
+ def _compare_numeric_values(self) -> bool:
689
+ r"""Compare numerical values, with unit conversion if needed."""
690
+ rel_tol = self._detect_tolerance(self.tolerance, self.gt_value)
691
+ self.gt_value = float(self.gt_value)
692
+
693
+ if self._is_number(self.sol_value):
694
+ self.sol_value = float(self.sol_value)
695
+ else:
696
+ logger.info(
697
+ f'Convert output expr {self.sol_value} into numerical.'
698
+ )
699
+ try:
700
+ sol_expr = self._parse_expression(self.sol_value)
701
+ self.sol_value = sol_expr.evalf()
702
+ except Exception as e:
703
+ raise ValueError(
704
+ f"Failed to evaluate output {self.sol_value}: {e}"
705
+ )
706
+
707
+ if (
708
+ self.gt_unit_expr is not None
709
+ and self.sol_unit_expr is not None
710
+ and not self.verify_unit(self.gt_unit_expr, self.sol_unit_expr)
711
+ ):
712
+ logger.info(
713
+ 'Units do not match directly. Attempting conversion...'
714
+ )
715
+ self._convert_units()
716
+
717
+ # try:
718
+ # Compare numerical values
719
+ logger.info(f'Solution value: {self.sol_value}')
720
+ logger.info(f'Ground truth value: {self.gt_value}')
721
+
722
+ return math.isclose(self.sol_value, self.gt_value, rel_tol=rel_tol)
723
+
724
+ def _compare_symbolic_values(self) -> bool:
725
+ r"""Compare symbolic expressions for equivalence."""
726
+
727
+ import sympy as sp
728
+
729
+ gt_expr = self._parse_expression(self.gt_value)
730
+ sol_expr = self._parse_expression(self.sol_value)
731
+
732
+ logger.info(f'Solution expression: {sol_expr}')
733
+ logger.info(f'Ground truth expression: {gt_expr}')
734
+
735
+ sol_symbols = sol_expr.free_symbols
736
+ gt_symbols = gt_expr.free_symbols
737
+
738
+ if sol_symbols != gt_symbols:
739
+ symbol_mapping = {}
740
+ for gt_sym in gt_symbols:
741
+ gt_name = str(gt_sym)
742
+ for sol_sym in sol_symbols:
743
+ if str(sol_sym) == gt_name:
744
+ symbol_mapping[gt_sym] = sol_sym
745
+ break
746
+ gt_expr = gt_expr.subs(symbol_mapping)
747
+
748
+ # Handle Equalities
749
+ if isinstance(sol_expr, sp.Eq) and isinstance(gt_expr, sp.Eq):
750
+ sol_expr = sp.simplify(sol_expr.rhs)
751
+ gt_expr = sp.simplify(gt_expr.rhs)
752
+ elif not isinstance(sol_expr, sp.Eq) and not isinstance(
753
+ gt_expr, sp.Eq
754
+ ):
755
+ sol_expr = sp.simplify(sol_expr)
756
+ gt_expr = sp.simplify(gt_expr)
757
+ else:
758
+ raise ValueError(
759
+ f"Cannot compare an equation with a non-equation "
760
+ f"directly: {sol_expr}, {gt_expr}"
761
+ )
762
+
763
+ try:
764
+ self.sol_value = float(
765
+ sol_expr
766
+ if isinstance(sol_expr, (int, float, sp.Number))
767
+ else sol_expr.evalf()
768
+ )
769
+ self.gt_value = float(
770
+ gt_expr
771
+ if isinstance(gt_expr, (int, float, sp.Number))
772
+ else gt_expr.evalf()
773
+ )
774
+ return math.isclose(
775
+ self.sol_value, self.gt_value, rel_tol=self.tolerance
776
+ )
777
+ except Exception:
778
+ try:
779
+ return math.isclose(sol_expr, gt_expr, rel_tol=self.tolerance)
780
+ except Exception:
781
+ return sol_expr == gt_expr
782
+
783
+
784
+ class PhysicsVerifier(PythonVerifier):
785
+ r"""The PhysicsVerifier inherits PythonVerifier and makes it able to
786
+ compare and convert units.
787
+
788
+ Args:
789
+ extractor (Optional[BaseExtractor]): The extractor to use for
790
+ extracting code from messages. (default: :obj:`None`)
791
+ timeout (Optional[float]): The timeout for code execution in seconds.
792
+ (default: :obj:`30.0`)
793
+ required_packages (Optional[List[str]]): The required packages for code
794
+ execution. (default: :obj:`None`)
795
+ float_tolerance (Optional[float]): The relative tolerance used to
796
+ compare numerical values. (default: :obj:`None`)
797
+ **kwargs: Additional keyword arguments to pass to the parent class.
798
+ """
799
+
800
+ def __init__(
801
+ self,
802
+ extractor: Optional[BaseExtractor] = None,
803
+ timeout: Optional[float] = 30.0,
804
+ required_packages: Optional[List[str]] = None,
805
+ float_tolerance: Optional[float] = None,
806
+ **kwargs,
807
+ ) -> None:
808
+ super().__init__(
809
+ extractor=extractor,
810
+ timeout=timeout,
811
+ required_packages=required_packages,
812
+ **kwargs,
813
+ )
814
+ self.tolerance = float_tolerance
815
+
816
+ async def _verify_implementation(
817
+ self, solution: str, reference_answer: Optional[str]
818
+ ) -> VerificationResult:
819
+ # Check for virtual environment setup
820
+ if not self.venv_path:
821
+ return VerificationResult(
822
+ status=VerificationOutcome.ERROR,
823
+ result="",
824
+ error_message="Virtual environment is not set up.",
825
+ )
826
+
827
+ # run the code block,
828
+ # which should already include a print(...) in the end
829
+ venv_python = os.path.join(
830
+ self.venv_path,
831
+ self.bin_dir,
832
+ "python.exe" if os.name == 'nt' else "python",
833
+ )
834
+ if not os.path.exists(venv_python):
835
+ return VerificationResult(
836
+ status=VerificationOutcome.ERROR,
837
+ result="",
838
+ error_message="Python binary not found in virtual environment",
839
+ )
840
+
841
+ try:
842
+ sol_out, sol_err, sol_code = await self._run_code_block(
843
+ solution, venv_python
844
+ )
845
+ if sol_code != 0:
846
+ return VerificationResult(
847
+ status=VerificationOutcome.ERROR,
848
+ result=sol_out,
849
+ error_message=f"Solution code error:\n{sol_err}",
850
+ )
851
+
852
+ logger.info(f"Solution: {sol_out}")
853
+
854
+ if reference_answer is None:
855
+ return VerificationResult(
856
+ status=VerificationOutcome.ERROR,
857
+ result="",
858
+ error_message=(
859
+ "Reference answer is required for physics "
860
+ "verification."
861
+ ),
862
+ )
863
+
864
+ comparator = PhysicsSolutionComparator(
865
+ sol_out, reference_answer, self.tolerance
866
+ )
867
+
868
+ return comparator.compare_solution_to_reference()
869
+
870
+ except asyncio.TimeoutError:
871
+ return VerificationResult(
872
+ status=VerificationOutcome.TIMEOUT,
873
+ result="",
874
+ error_message="Execution timed out.",
875
+ )
876
+ except Exception as e:
877
+ return VerificationResult(
878
+ status=VerificationOutcome.ERROR,
879
+ result="",
880
+ error_message=f"Unexpected error: {e}",
881
+ )