lionagi 0.14.9__py3-none-any.whl → 0.14.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lionagi/utils.py CHANGED
@@ -14,9 +14,7 @@ import re
14
14
  import shutil
15
15
  import subprocess
16
16
  import sys
17
- import time as t_
18
17
  import uuid
19
- import xml.etree.ElementTree as ET
20
18
  from abc import ABC
21
19
  from collections.abc import (
22
20
  AsyncGenerator,
@@ -27,7 +25,6 @@ from collections.abc import (
27
25
  )
28
26
  from concurrent.futures import ThreadPoolExecutor
29
27
  from datetime import datetime, timezone
30
- from decimal import Decimal
31
28
  from enum import Enum
32
29
  from functools import lru_cache, partial
33
30
  from inspect import isclass
@@ -43,11 +40,12 @@ from typing import (
43
40
  )
44
41
 
45
42
  import anyio
46
- from pydantic import BaseModel, model_validator
43
+ from pydantic import BaseModel
47
44
  from pydantic_core import PydanticUndefinedType
48
45
 
49
46
  from .libs.concurrency import Lock as ConcurrencyLock
50
47
  from .libs.concurrency import Semaphore, create_task_group
48
+ from .libs.validate.xml_parser import xml_to_dict
51
49
  from .settings import Settings
52
50
 
53
51
  R = TypeVar("R")
@@ -89,7 +87,6 @@ __all__ = (
89
87
  "throttle",
90
88
  "max_concurrent",
91
89
  "force_async",
92
- "to_num",
93
90
  "breakdown_pydantic_annotation",
94
91
  "run_package_manager_command",
95
92
  "StringEnum",
@@ -473,24 +470,6 @@ def to_list(
473
470
  return processed
474
471
 
475
472
 
476
- class ToListParams(Params):
477
- flatten: bool = False
478
- dropna: bool = False
479
- unique: bool = False
480
- use_values: bool = False
481
- flatten_tuple_set: bool = False
482
-
483
- def __call__(self, input_: Any):
484
- return to_list(
485
- input_,
486
- flatten=self.flatten,
487
- dropna=self.dropna,
488
- unique=self.unique,
489
- use_values=self.use_values,
490
- flatten_tuple_set=self.flatten_tuple_set,
491
- )
492
-
493
-
494
473
  def lcall(
495
474
  input_: Iterable[T] | T,
496
475
  func: Callable[[T], R] | Iterable[Callable[[T], R]],
@@ -598,29 +577,6 @@ def lcall(
598
577
  return out
599
578
 
600
579
 
601
- class CallParams(Params):
602
- """params class for high order function with additional handling of lower order function parameters, can take arbitrary number of args and kwargs, args need to be in agrs=, kwargs can be passed as is"""
603
-
604
- args: list = []
605
- kwargs: dict = {}
606
-
607
- @model_validator(mode="before")
608
- def _validate_data(cls, data: dict):
609
- _d = {}
610
- for k in list(data.keys()):
611
- if k in cls.keys():
612
- _d[k] = data.pop(k)
613
- _d.setdefault("args", [])
614
- _d.setdefault("kwargs", {})
615
- _d["kwargs"].update(data)
616
- return _d
617
-
618
- def __call__(self, *args, **kwargs):
619
- raise NotImplementedError(
620
- "This method should be implemented in a subclass"
621
- )
622
-
623
-
624
580
  async def alcall(
625
581
  input_: list[Any],
626
582
  func: Callable[..., T],
@@ -933,92 +889,6 @@ def create_path(
933
889
  return full_path
934
890
 
935
891
 
936
- class CreatePathParams(Params):
937
- directory: Path | str
938
- filename: str
939
- extension: str = None
940
- timestamp: bool = False
941
- dir_exist_ok: bool = True
942
- file_exist_ok: bool = False
943
- time_prefix: bool = False
944
- timestamp_format: str | None = None
945
- random_hash_digits: int = 0
946
-
947
- def __call__(
948
- self, directory: Path | str = None, filename: str = None
949
- ) -> Path:
950
- return create_path(
951
- directory or self.directory,
952
- filename or self.filename,
953
- extension=self.extension,
954
- timestamp=self.timestamp,
955
- dir_exist_ok=self.dir_exist_ok,
956
- file_exist_ok=self.file_exist_ok,
957
- time_prefix=self.time_prefix,
958
- timestamp_format=self.timestamp_format,
959
- random_hash_digits=self.random_hash_digits,
960
- )
961
-
962
-
963
- # --- JSON and XML Conversion ---
964
-
965
-
966
- def to_xml(
967
- obj: dict | list | str | int | float | bool | None,
968
- root_name: str = "root",
969
- ) -> str:
970
- """
971
- Convert a dictionary into an XML formatted string.
972
-
973
- Rules:
974
- - A dictionary key becomes an XML tag.
975
- - If the dictionary value is:
976
- - A primitive type (str, int, float, bool, None): it becomes the text content of the tag.
977
- - A list: each element of the list will repeat the same tag.
978
- - Another dictionary: it is recursively converted to nested XML.
979
- - root_name sets the top-level XML element name.
980
-
981
- Args:
982
- obj: The Python object to convert (typically a dictionary).
983
- root_name: The name of the root XML element.
984
-
985
- Returns:
986
- A string representing the XML.
987
-
988
- Examples:
989
- >>> to_xml({"a": 1, "b": {"c": "hello", "d": [10, 20]}}, root_name="data")
990
- '<data><a>1</a><b><c>hello</c><d>10</d><d>20</d></b></data>'
991
- """
992
-
993
- def _convert(value: Any, tag_name: str) -> str:
994
- # If value is a dict, recursively convert its keys
995
- if isinstance(value, dict):
996
- inner = "".join(_convert(v, k) for k, v in value.items())
997
- return f"<{tag_name}>{inner}</{tag_name}>"
998
- # If value is a list, repeat the same tag for each element
999
- elif isinstance(value, list):
1000
- return "".join(_convert(item, tag_name) for item in value)
1001
- # If value is a primitive, convert to string and place inside tag
1002
- else:
1003
- text = "" if value is None else str(value)
1004
- # Escape special XML characters if needed (minimal)
1005
- text = (
1006
- text.replace("&", "&amp;")
1007
- .replace("<", "&lt;")
1008
- .replace(">", "&gt;")
1009
- .replace('"', "&quot;")
1010
- .replace("'", "&apos;")
1011
- )
1012
- return f"<{tag_name}>{text}</{tag_name}>"
1013
-
1014
- # If top-level obj is not a dict, wrap it in one
1015
- if not isinstance(obj, dict):
1016
- obj = {root_name: obj}
1017
-
1018
- inner_xml = "".join(_convert(v, k) for k, v in obj.items())
1019
- return f"<{root_name}>{inner_xml}</{root_name}>"
1020
-
1021
-
1022
892
  def fuzzy_parse_json(
1023
893
  str_to_parse: str, /
1024
894
  ) -> dict[str, Any] | list[dict[str, Any]]:
@@ -1132,148 +1002,6 @@ def fix_json_string(str_to_parse: str, /) -> str:
1132
1002
  return str_to_parse
1133
1003
 
1134
1004
 
1135
- class XMLParser:
1136
- def __init__(self, xml_string: str):
1137
- self.xml_string = xml_string.strip()
1138
- self.index = 0
1139
-
1140
- def parse(self) -> dict[str, Any]:
1141
- """Parse the XML string and return the root element as a dictionary."""
1142
- return self._parse_element()
1143
-
1144
- def _parse_element(self) -> dict[str, Any]:
1145
- """Parse a single XML element and its children."""
1146
- self._skip_whitespace()
1147
- if self.xml_string[self.index] != "<":
1148
- raise ValueError(
1149
- f"Expected '<', found '{self.xml_string[self.index]}'"
1150
- )
1151
-
1152
- tag, attributes = self._parse_opening_tag()
1153
- children: dict[str, str | list | dict] = {}
1154
- text = ""
1155
-
1156
- while self.index < len(self.xml_string):
1157
- self._skip_whitespace()
1158
- if self.xml_string.startswith("</", self.index):
1159
- closing_tag = self._parse_closing_tag()
1160
- if closing_tag != tag:
1161
- raise ValueError(
1162
- f"Mismatched tags: '{tag}' and '{closing_tag}'"
1163
- )
1164
- break
1165
- elif self.xml_string.startswith("<", self.index):
1166
- child = self._parse_element()
1167
- child_tag, child_data = next(iter(child.items()))
1168
- if child_tag in children:
1169
- if not isinstance(children[child_tag], list):
1170
- children[child_tag] = [children[child_tag]]
1171
- children[child_tag].append(child_data)
1172
- else:
1173
- children[child_tag] = child_data
1174
- else:
1175
- text += self._parse_text()
1176
-
1177
- result: dict[str, Any] = {}
1178
- if attributes:
1179
- result["@attributes"] = attributes
1180
- if children:
1181
- result.update(children)
1182
- elif text.strip():
1183
- result = text.strip()
1184
-
1185
- return {tag: result}
1186
-
1187
- def _parse_opening_tag(self) -> tuple[str, dict[str, str]]:
1188
- """Parse an opening XML tag and its attributes."""
1189
- match = re.match(
1190
- r'<(\w+)((?:\s+\w+="[^"]*")*)\s*/?>',
1191
- self.xml_string[self.index :], # noqa
1192
- )
1193
- if not match:
1194
- raise ValueError("Invalid opening tag")
1195
- self.index += match.end()
1196
- tag = match.group(1)
1197
- attributes = dict(re.findall(r'(\w+)="([^"]*)"', match.group(2)))
1198
- return tag, attributes
1199
-
1200
- def _parse_closing_tag(self) -> str:
1201
- """Parse a closing XML tag."""
1202
- match = re.match(r"</(\w+)>", self.xml_string[self.index :]) # noqa
1203
- if not match:
1204
- raise ValueError("Invalid closing tag")
1205
- self.index += match.end()
1206
- return match.group(1)
1207
-
1208
- def _parse_text(self) -> str:
1209
- """Parse text content between XML tags."""
1210
- start = self.index
1211
- while (
1212
- self.index < len(self.xml_string)
1213
- and self.xml_string[self.index] != "<"
1214
- ):
1215
- self.index += 1
1216
- return self.xml_string[start : self.index] # noqa
1217
-
1218
- def _skip_whitespace(self) -> None:
1219
- """Skip any whitespace characters at the current parsing position."""
1220
- p_ = len(self.xml_string[self.index :]) # noqa
1221
- m_ = len(self.xml_string[self.index :].lstrip()) # noqa
1222
-
1223
- self.index += p_ - m_
1224
-
1225
-
1226
- def xml_to_dict(
1227
- xml_string: str,
1228
- /,
1229
- suppress=False,
1230
- remove_root: bool = True,
1231
- root_tag: str = None,
1232
- ) -> dict[str, Any]:
1233
- """
1234
- Parse an XML string into a nested dictionary structure.
1235
-
1236
- This function converts an XML string into a dictionary where:
1237
- - Element tags become dictionary keys
1238
- - Text content is assigned directly to the tag key if there are no children
1239
- - Attributes are stored in a '@attributes' key
1240
- - Multiple child elements with the same tag are stored as lists
1241
-
1242
- Args:
1243
- xml_string: The XML string to parse.
1244
-
1245
- Returns:
1246
- A dictionary representation of the XML structure.
1247
-
1248
- Raises:
1249
- ValueError: If the XML is malformed or parsing fails.
1250
- """
1251
- try:
1252
- a = XMLParser(xml_string).parse()
1253
- if remove_root and (root_tag or "root") in a:
1254
- a = a[root_tag or "root"]
1255
- return a
1256
- except ValueError as e:
1257
- if not suppress:
1258
- raise e
1259
-
1260
-
1261
- def dict_to_xml(data: dict, /, root_tag: str = "root") -> str:
1262
- root = ET.Element(root_tag)
1263
-
1264
- def convert(dict_obj: dict, parent: Any) -> None:
1265
- for key, val in dict_obj.items():
1266
- if isinstance(val, dict):
1267
- element = ET.SubElement(parent, key)
1268
- convert(dict_obj=val, parent=element)
1269
- else:
1270
- element = ET.SubElement(parent, key)
1271
- element.text = str(object=val)
1272
-
1273
- convert(dict_obj=data, parent=root)
1274
- return ET.tostring(root, encoding="unicode")
1275
-
1276
-
1277
1005
  def to_dict(
1278
1006
  input_: Any,
1279
1007
  /,
@@ -1677,74 +1405,6 @@ def get_bins(input_: list[str], upper: int) -> list[list[int]]:
1677
1405
  return bins
1678
1406
 
1679
1407
 
1680
- class Throttle:
1681
- """
1682
- Provide a throttling mechanism for function calls.
1683
-
1684
- When used as a decorator, it ensures that the decorated function can only
1685
- be called once per specified period. Subsequent calls within this period
1686
- are delayed to enforce this constraint.
1687
-
1688
- Attributes:
1689
- period: The minimum time period (in seconds) between successive calls.
1690
- """
1691
-
1692
- def __init__(self, period: float) -> None:
1693
- """
1694
- Initialize a new instance of Throttle.
1695
-
1696
- Args:
1697
- period: The minimum time period (in seconds) between
1698
- successive calls.
1699
- """
1700
- self.period = period
1701
- self.last_called = 0
1702
-
1703
- def __call__(self, func: Callable[..., T]) -> Callable[..., T]:
1704
- """
1705
- Decorate a synchronous function with the throttling mechanism.
1706
-
1707
- Args:
1708
- func: The synchronous function to be throttled.
1709
-
1710
- Returns:
1711
- The throttled synchronous function.
1712
- """
1713
-
1714
- @functools.wraps(func)
1715
- def wrapper(*args, **kwargs) -> Any:
1716
- elapsed = time() - self.last_called
1717
- if elapsed < self.period:
1718
- t_.sleep(self.period - elapsed)
1719
- self.last_called = time()
1720
- return func(*args, **kwargs)
1721
-
1722
- return wrapper
1723
-
1724
- def __call_async__(
1725
- self, func: Callable[..., Callable[..., T]]
1726
- ) -> Callable[..., Callable[..., T]]:
1727
- """
1728
- Decorate an asynchronous function with the throttling mechanism.
1729
-
1730
- Args:
1731
- func: The asynchronous function to be throttled.
1732
-
1733
- Returns:
1734
- The throttled asynchronous function.
1735
- """
1736
-
1737
- @functools.wraps(func)
1738
- async def wrapper(*args, **kwargs) -> Any:
1739
- elapsed = time() - self.last_called
1740
- if elapsed < self.period:
1741
- await asyncio.sleep(self.period - elapsed)
1742
- self.last_called = time()
1743
- return await func(*args, **kwargs)
1744
-
1745
- return wrapper
1746
-
1747
-
1748
1408
  def force_async(fn: Callable[..., T]) -> Callable[..., Callable[..., T]]:
1749
1409
  """
1750
1410
  Convert a synchronous function to an asynchronous function
@@ -1779,6 +1439,8 @@ def throttle(
1779
1439
  Returns:
1780
1440
  The throttled function.
1781
1441
  """
1442
+ from lionagi.libs.concurrency.throttle import Throttle
1443
+
1782
1444
  if not is_coro_func(func):
1783
1445
  func = force_async(func)
1784
1446
  throttle_instance = Throttle(period)
@@ -1816,380 +1478,6 @@ def max_concurrent(
1816
1478
  return wrapper
1817
1479
 
1818
1480
 
1819
- # Type definitions
1820
- NUM_TYPE_LITERAL = Literal["int", "float", "complex"]
1821
- NUM_TYPES = type[int] | type[float] | type[complex] | NUM_TYPE_LITERAL
1822
- NumericType = TypeVar("NumericType", int, float, complex)
1823
-
1824
- # Type mapping
1825
- TYPE_MAP = {"int": int, "float": float, "complex": complex}
1826
-
1827
- # Regex patterns for different numeric formats
1828
- PATTERNS = {
1829
- "scientific": r"[-+]?(?:\d*\.)?\d+[eE][-+]?\d+",
1830
- "complex_sci": r"[-+]?(?:\d*\.)?\d+(?:[eE][-+]?\d+)?[-+](?:\d*\.)?\d+(?:[eE][-+]?\d+)?[jJ]",
1831
- "complex": r"[-+]?(?:\d*\.)?\d+[-+](?:\d*\.)?\d+[jJ]",
1832
- "pure_imaginary": r"[-+]?(?:\d*\.)?\d*[jJ]",
1833
- "percentage": r"[-+]?(?:\d*\.)?\d+%",
1834
- "fraction": r"[-+]?\d+/\d+",
1835
- "decimal": r"[-+]?(?:\d*\.)?\d+",
1836
- "special": r"[-+]?(?:inf|infinity|nan)",
1837
- }
1838
-
1839
-
1840
- def to_num(
1841
- input_: Any,
1842
- /,
1843
- *,
1844
- upper_bound: int | float | None = None,
1845
- lower_bound: int | float | None = None,
1846
- num_type: NUM_TYPES = float,
1847
- precision: int | None = None,
1848
- num_count: int = 1,
1849
- ) -> int | float | complex | list[int | float | complex]:
1850
- """Convert input to numeric type(s) with validation and bounds checking.
1851
-
1852
- Args:
1853
- input_value: The input to convert to number(s).
1854
- upper_bound: Maximum allowed value (inclusive).
1855
- lower_bound: Minimum allowed value (inclusive).
1856
- num_type: Target numeric type ('int', 'float', 'complex' or type objects).
1857
- precision: Number of decimal places for rounding (float only).
1858
- num_count: Number of numeric values to extract.
1859
-
1860
- Returns:
1861
- Converted number(s). Single value if num_count=1, else list.
1862
-
1863
- Raises:
1864
- ValueError: For invalid input or out of bounds values.
1865
- TypeError: For invalid input types or invalid type conversions.
1866
- """
1867
- # Validate input
1868
- if isinstance(input_, (list, tuple)):
1869
- raise TypeError("Input cannot be a sequence")
1870
-
1871
- # Handle boolean input
1872
- if isinstance(input_, bool):
1873
- return validate_num_type(num_type)(input_)
1874
-
1875
- # Handle direct numeric input
1876
- if isinstance(input_, (int, float, complex, Decimal)):
1877
- inferred_type = type(input_)
1878
- if isinstance(input_, Decimal):
1879
- inferred_type = float
1880
- value = float(input_) if not isinstance(input_, complex) else input_
1881
- value = apply_bounds(value, upper_bound, lower_bound)
1882
- value = apply_precision(value, precision)
1883
- return convert_type(value, validate_num_type(num_type), inferred_type)
1884
-
1885
- # Convert input to string and extract numbers
1886
- input_str = str(input_)
1887
- number_matches = extract_numbers(input_str)
1888
-
1889
- if not number_matches:
1890
- raise ValueError(f"No valid numbers found in: {input_str}")
1891
-
1892
- # Process numbers
1893
- results = []
1894
- target_type = validate_num_type(num_type)
1895
-
1896
- number_matches = (
1897
- number_matches[:num_count]
1898
- if num_count < len(number_matches)
1899
- else number_matches
1900
- )
1901
-
1902
- for type_and_value in number_matches:
1903
- try:
1904
- # Infer appropriate type
1905
- inferred_type = infer_type(type_and_value)
1906
-
1907
- # Parse to numeric value
1908
- value = parse_number(type_and_value)
1909
-
1910
- # Apply bounds if not complex
1911
- value = apply_bounds(value, upper_bound, lower_bound)
1912
-
1913
- # Apply precision
1914
- value = apply_precision(value, precision)
1915
-
1916
- # Convert to target type if different from inferred
1917
- value = convert_type(value, target_type, inferred_type)
1918
-
1919
- results.append(value)
1920
-
1921
- except Exception as e:
1922
- if len(type_and_value) == 2:
1923
- raise type(e)(
1924
- f"Error processing {type_and_value[1]}: {str(e)}"
1925
- )
1926
- raise type(e)(f"Error processing {type_and_value}: {str(e)}")
1927
-
1928
- if results and num_count == 1:
1929
- return results[0]
1930
- return results
1931
-
1932
-
1933
- def extract_numbers(text: str) -> list[tuple[str, str]]:
1934
- """Extract numeric values from text using ordered regex patterns.
1935
-
1936
- Args:
1937
- text: The text to extract numbers from.
1938
-
1939
- Returns:
1940
- List of tuples containing (pattern_type, matched_value).
1941
- """
1942
- combined_pattern = "|".join(PATTERNS.values())
1943
- matches = re.finditer(combined_pattern, text, re.IGNORECASE)
1944
- numbers = []
1945
-
1946
- for match in matches:
1947
- value = match.group()
1948
- # Check which pattern matched
1949
- for pattern_name, pattern in PATTERNS.items():
1950
- if re.fullmatch(pattern, value, re.IGNORECASE):
1951
- numbers.append((pattern_name, value))
1952
- break
1953
-
1954
- return numbers
1955
-
1956
-
1957
- def validate_num_type(num_type: NUM_TYPES) -> type:
1958
- """Validate and normalize numeric type specification.
1959
-
1960
- Args:
1961
- num_type: The numeric type to validate.
1962
-
1963
- Returns:
1964
- The normalized Python type object.
1965
-
1966
- Raises:
1967
- ValueError: If the type specification is invalid.
1968
- """
1969
- if isinstance(num_type, str):
1970
- if num_type not in TYPE_MAP:
1971
- raise ValueError(f"Invalid number type: {num_type}")
1972
- return TYPE_MAP[num_type]
1973
-
1974
- if num_type not in (int, float, complex):
1975
- raise ValueError(f"Invalid number type: {num_type}")
1976
- return num_type
1977
-
1978
-
1979
- def infer_type(value: tuple[str, str]) -> type:
1980
- """Infer appropriate numeric type from value.
1981
-
1982
- Args:
1983
- value: Tuple of (pattern_type, matched_value).
1984
-
1985
- Returns:
1986
- The inferred Python type.
1987
- """
1988
- pattern_type, _ = value
1989
- if pattern_type in ("complex", "complex_sci", "pure_imaginary"):
1990
- return complex
1991
- return float
1992
-
1993
-
1994
- def convert_special(value: str) -> float:
1995
- """Convert special float values (inf, -inf, nan).
1996
-
1997
- Args:
1998
- value: The string value to convert.
1999
-
2000
- Returns:
2001
- The converted float value.
2002
- """
2003
- value = value.lower()
2004
- if "infinity" in value or "inf" in value:
2005
- return float("-inf") if value.startswith("-") else float("inf")
2006
- return float("nan")
2007
-
2008
-
2009
- def convert_percentage(value: str) -> float:
2010
- """Convert percentage string to float.
2011
-
2012
- Args:
2013
- value: The percentage string to convert.
2014
-
2015
- Returns:
2016
- The converted float value.
2017
-
2018
- Raises:
2019
- ValueError: If the percentage value is invalid.
2020
- """
2021
- try:
2022
- return float(value.rstrip("%")) / 100
2023
- except ValueError as e:
2024
- raise ValueError(f"Invalid percentage value: {value}") from e
2025
-
2026
-
2027
- def convert_complex(value: str) -> complex:
2028
- """Convert complex number string to complex.
2029
-
2030
- Args:
2031
- value: The complex number string to convert.
2032
-
2033
- Returns:
2034
- The converted complex value.
2035
-
2036
- Raises:
2037
- ValueError: If the complex number is invalid.
2038
- """
2039
- try:
2040
- # Handle pure imaginary numbers
2041
- if value.endswith("j") or value.endswith("J"):
2042
- if value in ("j", "J"):
2043
- return complex(0, 1)
2044
- if value in ("+j", "+J"):
2045
- return complex(0, 1)
2046
- if value in ("-j", "-J"):
2047
- return complex(0, -1)
2048
- if "+" not in value and "-" not in value[1:]:
2049
- # Pure imaginary number
2050
- imag = float(value[:-1] or "1")
2051
- return complex(0, imag)
2052
-
2053
- return complex(value.replace(" ", ""))
2054
- except ValueError as e:
2055
- raise ValueError(f"Invalid complex number: {value}") from e
2056
-
2057
-
2058
- def convert_type(
2059
- value: float | complex,
2060
- target_type: type,
2061
- inferred_type: type,
2062
- ) -> int | float | complex:
2063
- """Convert value to target type if specified, otherwise use inferred type.
2064
-
2065
- Args:
2066
- value: The value to convert.
2067
- target_type: The requested target type.
2068
- inferred_type: The inferred type from the value.
2069
-
2070
- Returns:
2071
- The converted value.
2072
-
2073
- Raises:
2074
- TypeError: If the conversion is not possible.
2075
- """
2076
- try:
2077
- # If no specific type requested, use inferred type
2078
- if target_type is float and inferred_type is complex:
2079
- return value
2080
-
2081
- # Handle explicit type conversions
2082
- if target_type is int and isinstance(value, complex):
2083
- raise TypeError("Cannot convert complex number to int")
2084
- return target_type(value)
2085
- except (ValueError, TypeError) as e:
2086
- raise TypeError(
2087
- f"Cannot convert {value} to {target_type.__name__}"
2088
- ) from e
2089
-
2090
-
2091
- def apply_bounds(
2092
- value: float | complex,
2093
- upper_bound: float | None = None,
2094
- lower_bound: float | None = None,
2095
- ) -> float | complex:
2096
- """Apply bounds checking to numeric value.
2097
-
2098
- Args:
2099
- value: The value to check.
2100
- upper_bound: Maximum allowed value (inclusive).
2101
- lower_bound: Minimum allowed value (inclusive).
2102
-
2103
- Returns:
2104
- The validated value.
2105
-
2106
- Raises:
2107
- ValueError: If the value is outside bounds.
2108
- """
2109
- if isinstance(value, complex):
2110
- return value
2111
-
2112
- if upper_bound is not None and value > upper_bound:
2113
- raise ValueError(f"Value {value} exceeds upper bound {upper_bound}")
2114
- if lower_bound is not None and value < lower_bound:
2115
- raise ValueError(f"Value {value} below lower bound {lower_bound}")
2116
- return value
2117
-
2118
-
2119
- def apply_precision(
2120
- value: float | complex,
2121
- precision: int | None,
2122
- ) -> float | complex:
2123
- """Apply precision rounding to numeric value.
2124
-
2125
- Args:
2126
- value: The value to round.
2127
- precision: Number of decimal places.
2128
-
2129
- Returns:
2130
- The rounded value.
2131
- """
2132
- if precision is None or isinstance(value, complex):
2133
- return value
2134
- if isinstance(value, float):
2135
- return round(value, precision)
2136
- return value
2137
-
2138
-
2139
- def parse_number(type_and_value: tuple[str, str]) -> float | complex:
2140
- """Parse string to numeric value based on pattern type.
2141
-
2142
- Args:
2143
- type_and_value: Tuple of (pattern_type, matched_value).
2144
-
2145
- Returns:
2146
- The parsed numeric value.
2147
-
2148
- Raises:
2149
- ValueError: If parsing fails.
2150
- """
2151
- num_type, value = type_and_value
2152
- value = value.strip()
2153
-
2154
- try:
2155
- if num_type == "special":
2156
- return convert_special(value)
2157
-
2158
- if num_type == "percentage":
2159
- return convert_percentage(value)
2160
-
2161
- if num_type == "fraction":
2162
- if "/" not in value:
2163
- raise ValueError(f"Invalid fraction: {value}")
2164
- if value.count("/") > 1:
2165
- raise ValueError(f"Invalid fraction: {value}")
2166
- num, denom = value.split("/")
2167
- if not (num.strip("-").isdigit() and denom.isdigit()):
2168
- raise ValueError(f"Invalid fraction: {value}")
2169
- denom_val = float(denom)
2170
- if denom_val == 0:
2171
- raise ValueError("Division by zero")
2172
- return float(num) / denom_val
2173
- if num_type in ("complex", "complex_sci", "pure_imaginary"):
2174
- return convert_complex(value)
2175
- if num_type == "scientific":
2176
- if "e" not in value.lower():
2177
- raise ValueError(f"Invalid scientific notation: {value}")
2178
- parts = value.lower().split("e")
2179
- if len(parts) != 2:
2180
- raise ValueError(f"Invalid scientific notation: {value}")
2181
- if not (parts[1].lstrip("+-").isdigit()):
2182
- raise ValueError(f"Invalid scientific notation: {value}")
2183
- return float(value)
2184
- if num_type == "decimal":
2185
- return float(value)
2186
-
2187
- raise ValueError(f"Unknown number type: {num_type}")
2188
- except Exception as e:
2189
- # Preserve the specific error type but wrap with more context
2190
- raise type(e)(f"Failed to parse {value} as {num_type}: {str(e)}")
2191
-
2192
-
2193
1481
  def breakdown_pydantic_annotation(
2194
1482
  model: type[B], max_depth: int | None = None, current_depth: int = 0
2195
1483
  ) -> dict[str, Any]:
@@ -2409,60 +1697,3 @@ def is_import_installed(package_name: str) -> bool:
2409
1697
  bool: True if the package is installed, False otherwise.
2410
1698
  """
2411
1699
  return importlib.util.find_spec(package_name) is not None
2412
-
2413
-
2414
- def read_image_to_base64(image_path: str | Path) -> str:
2415
- import base64
2416
-
2417
- import cv2
2418
-
2419
- image_path = str(image_path)
2420
- image = cv2.imread(image_path, cv2.COLOR_BGR2RGB)
2421
-
2422
- if image is None:
2423
- raise ValueError(f"Could not read image from path: {image_path}")
2424
-
2425
- file_extension = "." + image_path.split(".")[-1]
2426
-
2427
- success, buffer = cv2.imencode(file_extension, image)
2428
- if not success:
2429
- raise ValueError(f"Could not encode image to {file_extension} format.")
2430
- encoded_image = base64.b64encode(buffer).decode("utf-8")
2431
- return encoded_image
2432
-
2433
-
2434
- def pdf_to_images(
2435
- pdf_path: str, output_folder: str, dpi: int = 300, fmt: str = "jpeg"
2436
- ) -> list:
2437
- """
2438
- Convert a PDF file into images, one image per page.
2439
-
2440
- Args:
2441
- pdf_path (str): Path to the input PDF file.
2442
- output_folder (str): Directory to save the output images.
2443
- dpi (int): Dots per inch (resolution) for conversion (default: 300).
2444
- fmt (str): Image format (default: 'jpeg'). Use 'png' if preferred.
2445
-
2446
- Returns:
2447
- list: A list of file paths for the saved images.
2448
- """
2449
- import os
2450
-
2451
- convert_from_path = check_import(
2452
- "pdf2image", import_name="convert_from_path"
2453
- )
2454
-
2455
- # Ensure the output folder exists
2456
- os.makedirs(output_folder, exist_ok=True)
2457
-
2458
- # Convert PDF to a list of PIL Image objects
2459
- images = convert_from_path(pdf_path, dpi=dpi)
2460
-
2461
- saved_paths = []
2462
- for i, image in enumerate(images):
2463
- # Construct the output file name
2464
- image_file = os.path.join(output_folder, f"page_{i + 1}.{fmt}")
2465
- image.save(image_file, fmt.upper())
2466
- saved_paths.append(image_file)
2467
-
2468
- return saved_paths