lionagi 0.14.9__py3-none-any.whl → 0.14.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lionagi/fields/reason.py +1 -1
- lionagi/libs/concurrency/throttle.py +79 -0
- lionagi/libs/parse.py +2 -1
- lionagi/libs/unstructured/__init__.py +0 -0
- lionagi/libs/unstructured/pdf_to_image.py +45 -0
- lionagi/libs/unstructured/read_image_to_base64.py +33 -0
- lionagi/libs/validate/to_num.py +378 -0
- lionagi/libs/validate/xml_parser.py +203 -0
- lionagi/service/hooks/_utils.py +2 -2
- lionagi/service/hooks/hook_registry.py +8 -8
- lionagi/tools/file/reader.py +1 -1
- lionagi/tools/memory/tools.py +2 -2
- lionagi/utils.py +4 -773
- lionagi/version.py +1 -1
- {lionagi-0.14.9.dist-info → lionagi-0.14.10.dist-info}/METADATA +5 -1
- {lionagi-0.14.9.dist-info → lionagi-0.14.10.dist-info}/RECORD +18 -12
- {lionagi-0.14.9.dist-info → lionagi-0.14.10.dist-info}/WHEEL +0 -0
- {lionagi-0.14.9.dist-info → lionagi-0.14.10.dist-info}/licenses/LICENSE +0 -0
lionagi/utils.py
CHANGED
@@ -14,9 +14,7 @@ import re
|
|
14
14
|
import shutil
|
15
15
|
import subprocess
|
16
16
|
import sys
|
17
|
-
import time as t_
|
18
17
|
import uuid
|
19
|
-
import xml.etree.ElementTree as ET
|
20
18
|
from abc import ABC
|
21
19
|
from collections.abc import (
|
22
20
|
AsyncGenerator,
|
@@ -27,7 +25,6 @@ from collections.abc import (
|
|
27
25
|
)
|
28
26
|
from concurrent.futures import ThreadPoolExecutor
|
29
27
|
from datetime import datetime, timezone
|
30
|
-
from decimal import Decimal
|
31
28
|
from enum import Enum
|
32
29
|
from functools import lru_cache, partial
|
33
30
|
from inspect import isclass
|
@@ -43,11 +40,12 @@ from typing import (
|
|
43
40
|
)
|
44
41
|
|
45
42
|
import anyio
|
46
|
-
from pydantic import BaseModel
|
43
|
+
from pydantic import BaseModel
|
47
44
|
from pydantic_core import PydanticUndefinedType
|
48
45
|
|
49
46
|
from .libs.concurrency import Lock as ConcurrencyLock
|
50
47
|
from .libs.concurrency import Semaphore, create_task_group
|
48
|
+
from .libs.validate.xml_parser import xml_to_dict
|
51
49
|
from .settings import Settings
|
52
50
|
|
53
51
|
R = TypeVar("R")
|
@@ -89,7 +87,6 @@ __all__ = (
|
|
89
87
|
"throttle",
|
90
88
|
"max_concurrent",
|
91
89
|
"force_async",
|
92
|
-
"to_num",
|
93
90
|
"breakdown_pydantic_annotation",
|
94
91
|
"run_package_manager_command",
|
95
92
|
"StringEnum",
|
@@ -473,24 +470,6 @@ def to_list(
|
|
473
470
|
return processed
|
474
471
|
|
475
472
|
|
476
|
-
class ToListParams(Params):
|
477
|
-
flatten: bool = False
|
478
|
-
dropna: bool = False
|
479
|
-
unique: bool = False
|
480
|
-
use_values: bool = False
|
481
|
-
flatten_tuple_set: bool = False
|
482
|
-
|
483
|
-
def __call__(self, input_: Any):
|
484
|
-
return to_list(
|
485
|
-
input_,
|
486
|
-
flatten=self.flatten,
|
487
|
-
dropna=self.dropna,
|
488
|
-
unique=self.unique,
|
489
|
-
use_values=self.use_values,
|
490
|
-
flatten_tuple_set=self.flatten_tuple_set,
|
491
|
-
)
|
492
|
-
|
493
|
-
|
494
473
|
def lcall(
|
495
474
|
input_: Iterable[T] | T,
|
496
475
|
func: Callable[[T], R] | Iterable[Callable[[T], R]],
|
@@ -598,29 +577,6 @@ def lcall(
|
|
598
577
|
return out
|
599
578
|
|
600
579
|
|
601
|
-
class CallParams(Params):
|
602
|
-
"""params class for high order function with additional handling of lower order function parameters, can take arbitrary number of args and kwargs, args need to be in agrs=, kwargs can be passed as is"""
|
603
|
-
|
604
|
-
args: list = []
|
605
|
-
kwargs: dict = {}
|
606
|
-
|
607
|
-
@model_validator(mode="before")
|
608
|
-
def _validate_data(cls, data: dict):
|
609
|
-
_d = {}
|
610
|
-
for k in list(data.keys()):
|
611
|
-
if k in cls.keys():
|
612
|
-
_d[k] = data.pop(k)
|
613
|
-
_d.setdefault("args", [])
|
614
|
-
_d.setdefault("kwargs", {})
|
615
|
-
_d["kwargs"].update(data)
|
616
|
-
return _d
|
617
|
-
|
618
|
-
def __call__(self, *args, **kwargs):
|
619
|
-
raise NotImplementedError(
|
620
|
-
"This method should be implemented in a subclass"
|
621
|
-
)
|
622
|
-
|
623
|
-
|
624
580
|
async def alcall(
|
625
581
|
input_: list[Any],
|
626
582
|
func: Callable[..., T],
|
@@ -933,92 +889,6 @@ def create_path(
|
|
933
889
|
return full_path
|
934
890
|
|
935
891
|
|
936
|
-
class CreatePathParams(Params):
|
937
|
-
directory: Path | str
|
938
|
-
filename: str
|
939
|
-
extension: str = None
|
940
|
-
timestamp: bool = False
|
941
|
-
dir_exist_ok: bool = True
|
942
|
-
file_exist_ok: bool = False
|
943
|
-
time_prefix: bool = False
|
944
|
-
timestamp_format: str | None = None
|
945
|
-
random_hash_digits: int = 0
|
946
|
-
|
947
|
-
def __call__(
|
948
|
-
self, directory: Path | str = None, filename: str = None
|
949
|
-
) -> Path:
|
950
|
-
return create_path(
|
951
|
-
directory or self.directory,
|
952
|
-
filename or self.filename,
|
953
|
-
extension=self.extension,
|
954
|
-
timestamp=self.timestamp,
|
955
|
-
dir_exist_ok=self.dir_exist_ok,
|
956
|
-
file_exist_ok=self.file_exist_ok,
|
957
|
-
time_prefix=self.time_prefix,
|
958
|
-
timestamp_format=self.timestamp_format,
|
959
|
-
random_hash_digits=self.random_hash_digits,
|
960
|
-
)
|
961
|
-
|
962
|
-
|
963
|
-
# --- JSON and XML Conversion ---
|
964
|
-
|
965
|
-
|
966
|
-
def to_xml(
|
967
|
-
obj: dict | list | str | int | float | bool | None,
|
968
|
-
root_name: str = "root",
|
969
|
-
) -> str:
|
970
|
-
"""
|
971
|
-
Convert a dictionary into an XML formatted string.
|
972
|
-
|
973
|
-
Rules:
|
974
|
-
- A dictionary key becomes an XML tag.
|
975
|
-
- If the dictionary value is:
|
976
|
-
- A primitive type (str, int, float, bool, None): it becomes the text content of the tag.
|
977
|
-
- A list: each element of the list will repeat the same tag.
|
978
|
-
- Another dictionary: it is recursively converted to nested XML.
|
979
|
-
- root_name sets the top-level XML element name.
|
980
|
-
|
981
|
-
Args:
|
982
|
-
obj: The Python object to convert (typically a dictionary).
|
983
|
-
root_name: The name of the root XML element.
|
984
|
-
|
985
|
-
Returns:
|
986
|
-
A string representing the XML.
|
987
|
-
|
988
|
-
Examples:
|
989
|
-
>>> to_xml({"a": 1, "b": {"c": "hello", "d": [10, 20]}}, root_name="data")
|
990
|
-
'<data><a>1</a><b><c>hello</c><d>10</d><d>20</d></b></data>'
|
991
|
-
"""
|
992
|
-
|
993
|
-
def _convert(value: Any, tag_name: str) -> str:
|
994
|
-
# If value is a dict, recursively convert its keys
|
995
|
-
if isinstance(value, dict):
|
996
|
-
inner = "".join(_convert(v, k) for k, v in value.items())
|
997
|
-
return f"<{tag_name}>{inner}</{tag_name}>"
|
998
|
-
# If value is a list, repeat the same tag for each element
|
999
|
-
elif isinstance(value, list):
|
1000
|
-
return "".join(_convert(item, tag_name) for item in value)
|
1001
|
-
# If value is a primitive, convert to string and place inside tag
|
1002
|
-
else:
|
1003
|
-
text = "" if value is None else str(value)
|
1004
|
-
# Escape special XML characters if needed (minimal)
|
1005
|
-
text = (
|
1006
|
-
text.replace("&", "&")
|
1007
|
-
.replace("<", "<")
|
1008
|
-
.replace(">", ">")
|
1009
|
-
.replace('"', """)
|
1010
|
-
.replace("'", "'")
|
1011
|
-
)
|
1012
|
-
return f"<{tag_name}>{text}</{tag_name}>"
|
1013
|
-
|
1014
|
-
# If top-level obj is not a dict, wrap it in one
|
1015
|
-
if not isinstance(obj, dict):
|
1016
|
-
obj = {root_name: obj}
|
1017
|
-
|
1018
|
-
inner_xml = "".join(_convert(v, k) for k, v in obj.items())
|
1019
|
-
return f"<{root_name}>{inner_xml}</{root_name}>"
|
1020
|
-
|
1021
|
-
|
1022
892
|
def fuzzy_parse_json(
|
1023
893
|
str_to_parse: str, /
|
1024
894
|
) -> dict[str, Any] | list[dict[str, Any]]:
|
@@ -1132,148 +1002,6 @@ def fix_json_string(str_to_parse: str, /) -> str:
|
|
1132
1002
|
return str_to_parse
|
1133
1003
|
|
1134
1004
|
|
1135
|
-
class XMLParser:
|
1136
|
-
def __init__(self, xml_string: str):
|
1137
|
-
self.xml_string = xml_string.strip()
|
1138
|
-
self.index = 0
|
1139
|
-
|
1140
|
-
def parse(self) -> dict[str, Any]:
|
1141
|
-
"""Parse the XML string and return the root element as a dictionary."""
|
1142
|
-
return self._parse_element()
|
1143
|
-
|
1144
|
-
def _parse_element(self) -> dict[str, Any]:
|
1145
|
-
"""Parse a single XML element and its children."""
|
1146
|
-
self._skip_whitespace()
|
1147
|
-
if self.xml_string[self.index] != "<":
|
1148
|
-
raise ValueError(
|
1149
|
-
f"Expected '<', found '{self.xml_string[self.index]}'"
|
1150
|
-
)
|
1151
|
-
|
1152
|
-
tag, attributes = self._parse_opening_tag()
|
1153
|
-
children: dict[str, str | list | dict] = {}
|
1154
|
-
text = ""
|
1155
|
-
|
1156
|
-
while self.index < len(self.xml_string):
|
1157
|
-
self._skip_whitespace()
|
1158
|
-
if self.xml_string.startswith("</", self.index):
|
1159
|
-
closing_tag = self._parse_closing_tag()
|
1160
|
-
if closing_tag != tag:
|
1161
|
-
raise ValueError(
|
1162
|
-
f"Mismatched tags: '{tag}' and '{closing_tag}'"
|
1163
|
-
)
|
1164
|
-
break
|
1165
|
-
elif self.xml_string.startswith("<", self.index):
|
1166
|
-
child = self._parse_element()
|
1167
|
-
child_tag, child_data = next(iter(child.items()))
|
1168
|
-
if child_tag in children:
|
1169
|
-
if not isinstance(children[child_tag], list):
|
1170
|
-
children[child_tag] = [children[child_tag]]
|
1171
|
-
children[child_tag].append(child_data)
|
1172
|
-
else:
|
1173
|
-
children[child_tag] = child_data
|
1174
|
-
else:
|
1175
|
-
text += self._parse_text()
|
1176
|
-
|
1177
|
-
result: dict[str, Any] = {}
|
1178
|
-
if attributes:
|
1179
|
-
result["@attributes"] = attributes
|
1180
|
-
if children:
|
1181
|
-
result.update(children)
|
1182
|
-
elif text.strip():
|
1183
|
-
result = text.strip()
|
1184
|
-
|
1185
|
-
return {tag: result}
|
1186
|
-
|
1187
|
-
def _parse_opening_tag(self) -> tuple[str, dict[str, str]]:
|
1188
|
-
"""Parse an opening XML tag and its attributes."""
|
1189
|
-
match = re.match(
|
1190
|
-
r'<(\w+)((?:\s+\w+="[^"]*")*)\s*/?>',
|
1191
|
-
self.xml_string[self.index :], # noqa
|
1192
|
-
)
|
1193
|
-
if not match:
|
1194
|
-
raise ValueError("Invalid opening tag")
|
1195
|
-
self.index += match.end()
|
1196
|
-
tag = match.group(1)
|
1197
|
-
attributes = dict(re.findall(r'(\w+)="([^"]*)"', match.group(2)))
|
1198
|
-
return tag, attributes
|
1199
|
-
|
1200
|
-
def _parse_closing_tag(self) -> str:
|
1201
|
-
"""Parse a closing XML tag."""
|
1202
|
-
match = re.match(r"</(\w+)>", self.xml_string[self.index :]) # noqa
|
1203
|
-
if not match:
|
1204
|
-
raise ValueError("Invalid closing tag")
|
1205
|
-
self.index += match.end()
|
1206
|
-
return match.group(1)
|
1207
|
-
|
1208
|
-
def _parse_text(self) -> str:
|
1209
|
-
"""Parse text content between XML tags."""
|
1210
|
-
start = self.index
|
1211
|
-
while (
|
1212
|
-
self.index < len(self.xml_string)
|
1213
|
-
and self.xml_string[self.index] != "<"
|
1214
|
-
):
|
1215
|
-
self.index += 1
|
1216
|
-
return self.xml_string[start : self.index] # noqa
|
1217
|
-
|
1218
|
-
def _skip_whitespace(self) -> None:
|
1219
|
-
"""Skip any whitespace characters at the current parsing position."""
|
1220
|
-
p_ = len(self.xml_string[self.index :]) # noqa
|
1221
|
-
m_ = len(self.xml_string[self.index :].lstrip()) # noqa
|
1222
|
-
|
1223
|
-
self.index += p_ - m_
|
1224
|
-
|
1225
|
-
|
1226
|
-
def xml_to_dict(
|
1227
|
-
xml_string: str,
|
1228
|
-
/,
|
1229
|
-
suppress=False,
|
1230
|
-
remove_root: bool = True,
|
1231
|
-
root_tag: str = None,
|
1232
|
-
) -> dict[str, Any]:
|
1233
|
-
"""
|
1234
|
-
Parse an XML string into a nested dictionary structure.
|
1235
|
-
|
1236
|
-
This function converts an XML string into a dictionary where:
|
1237
|
-
- Element tags become dictionary keys
|
1238
|
-
- Text content is assigned directly to the tag key if there are no children
|
1239
|
-
- Attributes are stored in a '@attributes' key
|
1240
|
-
- Multiple child elements with the same tag are stored as lists
|
1241
|
-
|
1242
|
-
Args:
|
1243
|
-
xml_string: The XML string to parse.
|
1244
|
-
|
1245
|
-
Returns:
|
1246
|
-
A dictionary representation of the XML structure.
|
1247
|
-
|
1248
|
-
Raises:
|
1249
|
-
ValueError: If the XML is malformed or parsing fails.
|
1250
|
-
"""
|
1251
|
-
try:
|
1252
|
-
a = XMLParser(xml_string).parse()
|
1253
|
-
if remove_root and (root_tag or "root") in a:
|
1254
|
-
a = a[root_tag or "root"]
|
1255
|
-
return a
|
1256
|
-
except ValueError as e:
|
1257
|
-
if not suppress:
|
1258
|
-
raise e
|
1259
|
-
|
1260
|
-
|
1261
|
-
def dict_to_xml(data: dict, /, root_tag: str = "root") -> str:
|
1262
|
-
root = ET.Element(root_tag)
|
1263
|
-
|
1264
|
-
def convert(dict_obj: dict, parent: Any) -> None:
|
1265
|
-
for key, val in dict_obj.items():
|
1266
|
-
if isinstance(val, dict):
|
1267
|
-
element = ET.SubElement(parent, key)
|
1268
|
-
convert(dict_obj=val, parent=element)
|
1269
|
-
else:
|
1270
|
-
element = ET.SubElement(parent, key)
|
1271
|
-
element.text = str(object=val)
|
1272
|
-
|
1273
|
-
convert(dict_obj=data, parent=root)
|
1274
|
-
return ET.tostring(root, encoding="unicode")
|
1275
|
-
|
1276
|
-
|
1277
1005
|
def to_dict(
|
1278
1006
|
input_: Any,
|
1279
1007
|
/,
|
@@ -1677,74 +1405,6 @@ def get_bins(input_: list[str], upper: int) -> list[list[int]]:
|
|
1677
1405
|
return bins
|
1678
1406
|
|
1679
1407
|
|
1680
|
-
class Throttle:
|
1681
|
-
"""
|
1682
|
-
Provide a throttling mechanism for function calls.
|
1683
|
-
|
1684
|
-
When used as a decorator, it ensures that the decorated function can only
|
1685
|
-
be called once per specified period. Subsequent calls within this period
|
1686
|
-
are delayed to enforce this constraint.
|
1687
|
-
|
1688
|
-
Attributes:
|
1689
|
-
period: The minimum time period (in seconds) between successive calls.
|
1690
|
-
"""
|
1691
|
-
|
1692
|
-
def __init__(self, period: float) -> None:
|
1693
|
-
"""
|
1694
|
-
Initialize a new instance of Throttle.
|
1695
|
-
|
1696
|
-
Args:
|
1697
|
-
period: The minimum time period (in seconds) between
|
1698
|
-
successive calls.
|
1699
|
-
"""
|
1700
|
-
self.period = period
|
1701
|
-
self.last_called = 0
|
1702
|
-
|
1703
|
-
def __call__(self, func: Callable[..., T]) -> Callable[..., T]:
|
1704
|
-
"""
|
1705
|
-
Decorate a synchronous function with the throttling mechanism.
|
1706
|
-
|
1707
|
-
Args:
|
1708
|
-
func: The synchronous function to be throttled.
|
1709
|
-
|
1710
|
-
Returns:
|
1711
|
-
The throttled synchronous function.
|
1712
|
-
"""
|
1713
|
-
|
1714
|
-
@functools.wraps(func)
|
1715
|
-
def wrapper(*args, **kwargs) -> Any:
|
1716
|
-
elapsed = time() - self.last_called
|
1717
|
-
if elapsed < self.period:
|
1718
|
-
t_.sleep(self.period - elapsed)
|
1719
|
-
self.last_called = time()
|
1720
|
-
return func(*args, **kwargs)
|
1721
|
-
|
1722
|
-
return wrapper
|
1723
|
-
|
1724
|
-
def __call_async__(
|
1725
|
-
self, func: Callable[..., Callable[..., T]]
|
1726
|
-
) -> Callable[..., Callable[..., T]]:
|
1727
|
-
"""
|
1728
|
-
Decorate an asynchronous function with the throttling mechanism.
|
1729
|
-
|
1730
|
-
Args:
|
1731
|
-
func: The asynchronous function to be throttled.
|
1732
|
-
|
1733
|
-
Returns:
|
1734
|
-
The throttled asynchronous function.
|
1735
|
-
"""
|
1736
|
-
|
1737
|
-
@functools.wraps(func)
|
1738
|
-
async def wrapper(*args, **kwargs) -> Any:
|
1739
|
-
elapsed = time() - self.last_called
|
1740
|
-
if elapsed < self.period:
|
1741
|
-
await asyncio.sleep(self.period - elapsed)
|
1742
|
-
self.last_called = time()
|
1743
|
-
return await func(*args, **kwargs)
|
1744
|
-
|
1745
|
-
return wrapper
|
1746
|
-
|
1747
|
-
|
1748
1408
|
def force_async(fn: Callable[..., T]) -> Callable[..., Callable[..., T]]:
|
1749
1409
|
"""
|
1750
1410
|
Convert a synchronous function to an asynchronous function
|
@@ -1779,6 +1439,8 @@ def throttle(
|
|
1779
1439
|
Returns:
|
1780
1440
|
The throttled function.
|
1781
1441
|
"""
|
1442
|
+
from lionagi.libs.concurrency.throttle import Throttle
|
1443
|
+
|
1782
1444
|
if not is_coro_func(func):
|
1783
1445
|
func = force_async(func)
|
1784
1446
|
throttle_instance = Throttle(period)
|
@@ -1816,380 +1478,6 @@ def max_concurrent(
|
|
1816
1478
|
return wrapper
|
1817
1479
|
|
1818
1480
|
|
1819
|
-
# Type definitions
|
1820
|
-
NUM_TYPE_LITERAL = Literal["int", "float", "complex"]
|
1821
|
-
NUM_TYPES = type[int] | type[float] | type[complex] | NUM_TYPE_LITERAL
|
1822
|
-
NumericType = TypeVar("NumericType", int, float, complex)
|
1823
|
-
|
1824
|
-
# Type mapping
|
1825
|
-
TYPE_MAP = {"int": int, "float": float, "complex": complex}
|
1826
|
-
|
1827
|
-
# Regex patterns for different numeric formats
|
1828
|
-
PATTERNS = {
|
1829
|
-
"scientific": r"[-+]?(?:\d*\.)?\d+[eE][-+]?\d+",
|
1830
|
-
"complex_sci": r"[-+]?(?:\d*\.)?\d+(?:[eE][-+]?\d+)?[-+](?:\d*\.)?\d+(?:[eE][-+]?\d+)?[jJ]",
|
1831
|
-
"complex": r"[-+]?(?:\d*\.)?\d+[-+](?:\d*\.)?\d+[jJ]",
|
1832
|
-
"pure_imaginary": r"[-+]?(?:\d*\.)?\d*[jJ]",
|
1833
|
-
"percentage": r"[-+]?(?:\d*\.)?\d+%",
|
1834
|
-
"fraction": r"[-+]?\d+/\d+",
|
1835
|
-
"decimal": r"[-+]?(?:\d*\.)?\d+",
|
1836
|
-
"special": r"[-+]?(?:inf|infinity|nan)",
|
1837
|
-
}
|
1838
|
-
|
1839
|
-
|
1840
|
-
def to_num(
|
1841
|
-
input_: Any,
|
1842
|
-
/,
|
1843
|
-
*,
|
1844
|
-
upper_bound: int | float | None = None,
|
1845
|
-
lower_bound: int | float | None = None,
|
1846
|
-
num_type: NUM_TYPES = float,
|
1847
|
-
precision: int | None = None,
|
1848
|
-
num_count: int = 1,
|
1849
|
-
) -> int | float | complex | list[int | float | complex]:
|
1850
|
-
"""Convert input to numeric type(s) with validation and bounds checking.
|
1851
|
-
|
1852
|
-
Args:
|
1853
|
-
input_value: The input to convert to number(s).
|
1854
|
-
upper_bound: Maximum allowed value (inclusive).
|
1855
|
-
lower_bound: Minimum allowed value (inclusive).
|
1856
|
-
num_type: Target numeric type ('int', 'float', 'complex' or type objects).
|
1857
|
-
precision: Number of decimal places for rounding (float only).
|
1858
|
-
num_count: Number of numeric values to extract.
|
1859
|
-
|
1860
|
-
Returns:
|
1861
|
-
Converted number(s). Single value if num_count=1, else list.
|
1862
|
-
|
1863
|
-
Raises:
|
1864
|
-
ValueError: For invalid input or out of bounds values.
|
1865
|
-
TypeError: For invalid input types or invalid type conversions.
|
1866
|
-
"""
|
1867
|
-
# Validate input
|
1868
|
-
if isinstance(input_, (list, tuple)):
|
1869
|
-
raise TypeError("Input cannot be a sequence")
|
1870
|
-
|
1871
|
-
# Handle boolean input
|
1872
|
-
if isinstance(input_, bool):
|
1873
|
-
return validate_num_type(num_type)(input_)
|
1874
|
-
|
1875
|
-
# Handle direct numeric input
|
1876
|
-
if isinstance(input_, (int, float, complex, Decimal)):
|
1877
|
-
inferred_type = type(input_)
|
1878
|
-
if isinstance(input_, Decimal):
|
1879
|
-
inferred_type = float
|
1880
|
-
value = float(input_) if not isinstance(input_, complex) else input_
|
1881
|
-
value = apply_bounds(value, upper_bound, lower_bound)
|
1882
|
-
value = apply_precision(value, precision)
|
1883
|
-
return convert_type(value, validate_num_type(num_type), inferred_type)
|
1884
|
-
|
1885
|
-
# Convert input to string and extract numbers
|
1886
|
-
input_str = str(input_)
|
1887
|
-
number_matches = extract_numbers(input_str)
|
1888
|
-
|
1889
|
-
if not number_matches:
|
1890
|
-
raise ValueError(f"No valid numbers found in: {input_str}")
|
1891
|
-
|
1892
|
-
# Process numbers
|
1893
|
-
results = []
|
1894
|
-
target_type = validate_num_type(num_type)
|
1895
|
-
|
1896
|
-
number_matches = (
|
1897
|
-
number_matches[:num_count]
|
1898
|
-
if num_count < len(number_matches)
|
1899
|
-
else number_matches
|
1900
|
-
)
|
1901
|
-
|
1902
|
-
for type_and_value in number_matches:
|
1903
|
-
try:
|
1904
|
-
# Infer appropriate type
|
1905
|
-
inferred_type = infer_type(type_and_value)
|
1906
|
-
|
1907
|
-
# Parse to numeric value
|
1908
|
-
value = parse_number(type_and_value)
|
1909
|
-
|
1910
|
-
# Apply bounds if not complex
|
1911
|
-
value = apply_bounds(value, upper_bound, lower_bound)
|
1912
|
-
|
1913
|
-
# Apply precision
|
1914
|
-
value = apply_precision(value, precision)
|
1915
|
-
|
1916
|
-
# Convert to target type if different from inferred
|
1917
|
-
value = convert_type(value, target_type, inferred_type)
|
1918
|
-
|
1919
|
-
results.append(value)
|
1920
|
-
|
1921
|
-
except Exception as e:
|
1922
|
-
if len(type_and_value) == 2:
|
1923
|
-
raise type(e)(
|
1924
|
-
f"Error processing {type_and_value[1]}: {str(e)}"
|
1925
|
-
)
|
1926
|
-
raise type(e)(f"Error processing {type_and_value}: {str(e)}")
|
1927
|
-
|
1928
|
-
if results and num_count == 1:
|
1929
|
-
return results[0]
|
1930
|
-
return results
|
1931
|
-
|
1932
|
-
|
1933
|
-
def extract_numbers(text: str) -> list[tuple[str, str]]:
|
1934
|
-
"""Extract numeric values from text using ordered regex patterns.
|
1935
|
-
|
1936
|
-
Args:
|
1937
|
-
text: The text to extract numbers from.
|
1938
|
-
|
1939
|
-
Returns:
|
1940
|
-
List of tuples containing (pattern_type, matched_value).
|
1941
|
-
"""
|
1942
|
-
combined_pattern = "|".join(PATTERNS.values())
|
1943
|
-
matches = re.finditer(combined_pattern, text, re.IGNORECASE)
|
1944
|
-
numbers = []
|
1945
|
-
|
1946
|
-
for match in matches:
|
1947
|
-
value = match.group()
|
1948
|
-
# Check which pattern matched
|
1949
|
-
for pattern_name, pattern in PATTERNS.items():
|
1950
|
-
if re.fullmatch(pattern, value, re.IGNORECASE):
|
1951
|
-
numbers.append((pattern_name, value))
|
1952
|
-
break
|
1953
|
-
|
1954
|
-
return numbers
|
1955
|
-
|
1956
|
-
|
1957
|
-
def validate_num_type(num_type: NUM_TYPES) -> type:
|
1958
|
-
"""Validate and normalize numeric type specification.
|
1959
|
-
|
1960
|
-
Args:
|
1961
|
-
num_type: The numeric type to validate.
|
1962
|
-
|
1963
|
-
Returns:
|
1964
|
-
The normalized Python type object.
|
1965
|
-
|
1966
|
-
Raises:
|
1967
|
-
ValueError: If the type specification is invalid.
|
1968
|
-
"""
|
1969
|
-
if isinstance(num_type, str):
|
1970
|
-
if num_type not in TYPE_MAP:
|
1971
|
-
raise ValueError(f"Invalid number type: {num_type}")
|
1972
|
-
return TYPE_MAP[num_type]
|
1973
|
-
|
1974
|
-
if num_type not in (int, float, complex):
|
1975
|
-
raise ValueError(f"Invalid number type: {num_type}")
|
1976
|
-
return num_type
|
1977
|
-
|
1978
|
-
|
1979
|
-
def infer_type(value: tuple[str, str]) -> type:
|
1980
|
-
"""Infer appropriate numeric type from value.
|
1981
|
-
|
1982
|
-
Args:
|
1983
|
-
value: Tuple of (pattern_type, matched_value).
|
1984
|
-
|
1985
|
-
Returns:
|
1986
|
-
The inferred Python type.
|
1987
|
-
"""
|
1988
|
-
pattern_type, _ = value
|
1989
|
-
if pattern_type in ("complex", "complex_sci", "pure_imaginary"):
|
1990
|
-
return complex
|
1991
|
-
return float
|
1992
|
-
|
1993
|
-
|
1994
|
-
def convert_special(value: str) -> float:
|
1995
|
-
"""Convert special float values (inf, -inf, nan).
|
1996
|
-
|
1997
|
-
Args:
|
1998
|
-
value: The string value to convert.
|
1999
|
-
|
2000
|
-
Returns:
|
2001
|
-
The converted float value.
|
2002
|
-
"""
|
2003
|
-
value = value.lower()
|
2004
|
-
if "infinity" in value or "inf" in value:
|
2005
|
-
return float("-inf") if value.startswith("-") else float("inf")
|
2006
|
-
return float("nan")
|
2007
|
-
|
2008
|
-
|
2009
|
-
def convert_percentage(value: str) -> float:
|
2010
|
-
"""Convert percentage string to float.
|
2011
|
-
|
2012
|
-
Args:
|
2013
|
-
value: The percentage string to convert.
|
2014
|
-
|
2015
|
-
Returns:
|
2016
|
-
The converted float value.
|
2017
|
-
|
2018
|
-
Raises:
|
2019
|
-
ValueError: If the percentage value is invalid.
|
2020
|
-
"""
|
2021
|
-
try:
|
2022
|
-
return float(value.rstrip("%")) / 100
|
2023
|
-
except ValueError as e:
|
2024
|
-
raise ValueError(f"Invalid percentage value: {value}") from e
|
2025
|
-
|
2026
|
-
|
2027
|
-
def convert_complex(value: str) -> complex:
|
2028
|
-
"""Convert complex number string to complex.
|
2029
|
-
|
2030
|
-
Args:
|
2031
|
-
value: The complex number string to convert.
|
2032
|
-
|
2033
|
-
Returns:
|
2034
|
-
The converted complex value.
|
2035
|
-
|
2036
|
-
Raises:
|
2037
|
-
ValueError: If the complex number is invalid.
|
2038
|
-
"""
|
2039
|
-
try:
|
2040
|
-
# Handle pure imaginary numbers
|
2041
|
-
if value.endswith("j") or value.endswith("J"):
|
2042
|
-
if value in ("j", "J"):
|
2043
|
-
return complex(0, 1)
|
2044
|
-
if value in ("+j", "+J"):
|
2045
|
-
return complex(0, 1)
|
2046
|
-
if value in ("-j", "-J"):
|
2047
|
-
return complex(0, -1)
|
2048
|
-
if "+" not in value and "-" not in value[1:]:
|
2049
|
-
# Pure imaginary number
|
2050
|
-
imag = float(value[:-1] or "1")
|
2051
|
-
return complex(0, imag)
|
2052
|
-
|
2053
|
-
return complex(value.replace(" ", ""))
|
2054
|
-
except ValueError as e:
|
2055
|
-
raise ValueError(f"Invalid complex number: {value}") from e
|
2056
|
-
|
2057
|
-
|
2058
|
-
def convert_type(
|
2059
|
-
value: float | complex,
|
2060
|
-
target_type: type,
|
2061
|
-
inferred_type: type,
|
2062
|
-
) -> int | float | complex:
|
2063
|
-
"""Convert value to target type if specified, otherwise use inferred type.
|
2064
|
-
|
2065
|
-
Args:
|
2066
|
-
value: The value to convert.
|
2067
|
-
target_type: The requested target type.
|
2068
|
-
inferred_type: The inferred type from the value.
|
2069
|
-
|
2070
|
-
Returns:
|
2071
|
-
The converted value.
|
2072
|
-
|
2073
|
-
Raises:
|
2074
|
-
TypeError: If the conversion is not possible.
|
2075
|
-
"""
|
2076
|
-
try:
|
2077
|
-
# If no specific type requested, use inferred type
|
2078
|
-
if target_type is float and inferred_type is complex:
|
2079
|
-
return value
|
2080
|
-
|
2081
|
-
# Handle explicit type conversions
|
2082
|
-
if target_type is int and isinstance(value, complex):
|
2083
|
-
raise TypeError("Cannot convert complex number to int")
|
2084
|
-
return target_type(value)
|
2085
|
-
except (ValueError, TypeError) as e:
|
2086
|
-
raise TypeError(
|
2087
|
-
f"Cannot convert {value} to {target_type.__name__}"
|
2088
|
-
) from e
|
2089
|
-
|
2090
|
-
|
2091
|
-
def apply_bounds(
|
2092
|
-
value: float | complex,
|
2093
|
-
upper_bound: float | None = None,
|
2094
|
-
lower_bound: float | None = None,
|
2095
|
-
) -> float | complex:
|
2096
|
-
"""Apply bounds checking to numeric value.
|
2097
|
-
|
2098
|
-
Args:
|
2099
|
-
value: The value to check.
|
2100
|
-
upper_bound: Maximum allowed value (inclusive).
|
2101
|
-
lower_bound: Minimum allowed value (inclusive).
|
2102
|
-
|
2103
|
-
Returns:
|
2104
|
-
The validated value.
|
2105
|
-
|
2106
|
-
Raises:
|
2107
|
-
ValueError: If the value is outside bounds.
|
2108
|
-
"""
|
2109
|
-
if isinstance(value, complex):
|
2110
|
-
return value
|
2111
|
-
|
2112
|
-
if upper_bound is not None and value > upper_bound:
|
2113
|
-
raise ValueError(f"Value {value} exceeds upper bound {upper_bound}")
|
2114
|
-
if lower_bound is not None and value < lower_bound:
|
2115
|
-
raise ValueError(f"Value {value} below lower bound {lower_bound}")
|
2116
|
-
return value
|
2117
|
-
|
2118
|
-
|
2119
|
-
def apply_precision(
|
2120
|
-
value: float | complex,
|
2121
|
-
precision: int | None,
|
2122
|
-
) -> float | complex:
|
2123
|
-
"""Apply precision rounding to numeric value.
|
2124
|
-
|
2125
|
-
Args:
|
2126
|
-
value: The value to round.
|
2127
|
-
precision: Number of decimal places.
|
2128
|
-
|
2129
|
-
Returns:
|
2130
|
-
The rounded value.
|
2131
|
-
"""
|
2132
|
-
if precision is None or isinstance(value, complex):
|
2133
|
-
return value
|
2134
|
-
if isinstance(value, float):
|
2135
|
-
return round(value, precision)
|
2136
|
-
return value
|
2137
|
-
|
2138
|
-
|
2139
|
-
def parse_number(type_and_value: tuple[str, str]) -> float | complex:
|
2140
|
-
"""Parse string to numeric value based on pattern type.
|
2141
|
-
|
2142
|
-
Args:
|
2143
|
-
type_and_value: Tuple of (pattern_type, matched_value).
|
2144
|
-
|
2145
|
-
Returns:
|
2146
|
-
The parsed numeric value.
|
2147
|
-
|
2148
|
-
Raises:
|
2149
|
-
ValueError: If parsing fails.
|
2150
|
-
"""
|
2151
|
-
num_type, value = type_and_value
|
2152
|
-
value = value.strip()
|
2153
|
-
|
2154
|
-
try:
|
2155
|
-
if num_type == "special":
|
2156
|
-
return convert_special(value)
|
2157
|
-
|
2158
|
-
if num_type == "percentage":
|
2159
|
-
return convert_percentage(value)
|
2160
|
-
|
2161
|
-
if num_type == "fraction":
|
2162
|
-
if "/" not in value:
|
2163
|
-
raise ValueError(f"Invalid fraction: {value}")
|
2164
|
-
if value.count("/") > 1:
|
2165
|
-
raise ValueError(f"Invalid fraction: {value}")
|
2166
|
-
num, denom = value.split("/")
|
2167
|
-
if not (num.strip("-").isdigit() and denom.isdigit()):
|
2168
|
-
raise ValueError(f"Invalid fraction: {value}")
|
2169
|
-
denom_val = float(denom)
|
2170
|
-
if denom_val == 0:
|
2171
|
-
raise ValueError("Division by zero")
|
2172
|
-
return float(num) / denom_val
|
2173
|
-
if num_type in ("complex", "complex_sci", "pure_imaginary"):
|
2174
|
-
return convert_complex(value)
|
2175
|
-
if num_type == "scientific":
|
2176
|
-
if "e" not in value.lower():
|
2177
|
-
raise ValueError(f"Invalid scientific notation: {value}")
|
2178
|
-
parts = value.lower().split("e")
|
2179
|
-
if len(parts) != 2:
|
2180
|
-
raise ValueError(f"Invalid scientific notation: {value}")
|
2181
|
-
if not (parts[1].lstrip("+-").isdigit()):
|
2182
|
-
raise ValueError(f"Invalid scientific notation: {value}")
|
2183
|
-
return float(value)
|
2184
|
-
if num_type == "decimal":
|
2185
|
-
return float(value)
|
2186
|
-
|
2187
|
-
raise ValueError(f"Unknown number type: {num_type}")
|
2188
|
-
except Exception as e:
|
2189
|
-
# Preserve the specific error type but wrap with more context
|
2190
|
-
raise type(e)(f"Failed to parse {value} as {num_type}: {str(e)}")
|
2191
|
-
|
2192
|
-
|
2193
1481
|
def breakdown_pydantic_annotation(
|
2194
1482
|
model: type[B], max_depth: int | None = None, current_depth: int = 0
|
2195
1483
|
) -> dict[str, Any]:
|
@@ -2409,60 +1697,3 @@ def is_import_installed(package_name: str) -> bool:
|
|
2409
1697
|
bool: True if the package is installed, False otherwise.
|
2410
1698
|
"""
|
2411
1699
|
return importlib.util.find_spec(package_name) is not None
|
2412
|
-
|
2413
|
-
|
2414
|
-
def read_image_to_base64(image_path: str | Path) -> str:
|
2415
|
-
import base64
|
2416
|
-
|
2417
|
-
import cv2
|
2418
|
-
|
2419
|
-
image_path = str(image_path)
|
2420
|
-
image = cv2.imread(image_path, cv2.COLOR_BGR2RGB)
|
2421
|
-
|
2422
|
-
if image is None:
|
2423
|
-
raise ValueError(f"Could not read image from path: {image_path}")
|
2424
|
-
|
2425
|
-
file_extension = "." + image_path.split(".")[-1]
|
2426
|
-
|
2427
|
-
success, buffer = cv2.imencode(file_extension, image)
|
2428
|
-
if not success:
|
2429
|
-
raise ValueError(f"Could not encode image to {file_extension} format.")
|
2430
|
-
encoded_image = base64.b64encode(buffer).decode("utf-8")
|
2431
|
-
return encoded_image
|
2432
|
-
|
2433
|
-
|
2434
|
-
def pdf_to_images(
|
2435
|
-
pdf_path: str, output_folder: str, dpi: int = 300, fmt: str = "jpeg"
|
2436
|
-
) -> list:
|
2437
|
-
"""
|
2438
|
-
Convert a PDF file into images, one image per page.
|
2439
|
-
|
2440
|
-
Args:
|
2441
|
-
pdf_path (str): Path to the input PDF file.
|
2442
|
-
output_folder (str): Directory to save the output images.
|
2443
|
-
dpi (int): Dots per inch (resolution) for conversion (default: 300).
|
2444
|
-
fmt (str): Image format (default: 'jpeg'). Use 'png' if preferred.
|
2445
|
-
|
2446
|
-
Returns:
|
2447
|
-
list: A list of file paths for the saved images.
|
2448
|
-
"""
|
2449
|
-
import os
|
2450
|
-
|
2451
|
-
convert_from_path = check_import(
|
2452
|
-
"pdf2image", import_name="convert_from_path"
|
2453
|
-
)
|
2454
|
-
|
2455
|
-
# Ensure the output folder exists
|
2456
|
-
os.makedirs(output_folder, exist_ok=True)
|
2457
|
-
|
2458
|
-
# Convert PDF to a list of PIL Image objects
|
2459
|
-
images = convert_from_path(pdf_path, dpi=dpi)
|
2460
|
-
|
2461
|
-
saved_paths = []
|
2462
|
-
for i, image in enumerate(images):
|
2463
|
-
# Construct the output file name
|
2464
|
-
image_file = os.path.join(output_folder, f"page_{i + 1}.{fmt}")
|
2465
|
-
image.save(image_file, fmt.upper())
|
2466
|
-
saved_paths.append(image_file)
|
2467
|
-
|
2468
|
-
return saved_paths
|