lionagi 0.14.8__py3-none-any.whl → 0.14.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lionagi/_errors.py +120 -11
- lionagi/_types.py +0 -6
- lionagi/config.py +3 -1
- lionagi/fields/reason.py +1 -1
- lionagi/libs/concurrency/throttle.py +79 -0
- lionagi/libs/parse.py +2 -1
- lionagi/libs/unstructured/__init__.py +0 -0
- lionagi/libs/unstructured/pdf_to_image.py +45 -0
- lionagi/libs/unstructured/read_image_to_base64.py +33 -0
- lionagi/libs/validate/to_num.py +378 -0
- lionagi/libs/validate/xml_parser.py +203 -0
- lionagi/models/operable_model.py +8 -3
- lionagi/operations/flow.py +0 -1
- lionagi/protocols/generic/event.py +2 -0
- lionagi/protocols/generic/log.py +26 -10
- lionagi/protocols/operatives/step.py +1 -1
- lionagi/protocols/types.py +9 -1
- lionagi/service/__init__.py +22 -1
- lionagi/service/connections/api_calling.py +57 -2
- lionagi/service/connections/endpoint_config.py +1 -1
- lionagi/service/connections/header_factory.py +4 -2
- lionagi/service/connections/match_endpoint.py +10 -10
- lionagi/service/connections/providers/anthropic_.py +5 -2
- lionagi/service/connections/providers/claude_code_.py +13 -17
- lionagi/service/connections/providers/claude_code_cli.py +51 -16
- lionagi/service/connections/providers/exa_.py +5 -3
- lionagi/service/connections/providers/oai_.py +116 -81
- lionagi/service/connections/providers/ollama_.py +38 -18
- lionagi/service/connections/providers/perplexity_.py +36 -14
- lionagi/service/connections/providers/types.py +30 -0
- lionagi/service/hooks/__init__.py +25 -0
- lionagi/service/hooks/_types.py +52 -0
- lionagi/service/hooks/_utils.py +85 -0
- lionagi/service/hooks/hook_event.py +67 -0
- lionagi/service/hooks/hook_registry.py +221 -0
- lionagi/service/imodel.py +120 -34
- lionagi/service/third_party/claude_code.py +715 -0
- lionagi/service/third_party/openai_model_names.py +198 -0
- lionagi/service/third_party/pplx_models.py +16 -8
- lionagi/service/types.py +21 -0
- lionagi/session/branch.py +1 -4
- lionagi/tools/base.py +1 -3
- lionagi/tools/file/reader.py +1 -1
- lionagi/tools/memory/tools.py +2 -2
- lionagi/utils.py +12 -775
- lionagi/version.py +1 -1
- {lionagi-0.14.8.dist-info → lionagi-0.14.10.dist-info}/METADATA +6 -2
- {lionagi-0.14.8.dist-info → lionagi-0.14.10.dist-info}/RECORD +50 -40
- lionagi/service/connections/providers/_claude_code/__init__.py +0 -3
- lionagi/service/connections/providers/_claude_code/models.py +0 -244
- lionagi/service/connections/providers/_claude_code/stream_cli.py +0 -359
- lionagi/service/third_party/openai_models.py +0 -18241
- {lionagi-0.14.8.dist-info → lionagi-0.14.10.dist-info}/WHEEL +0 -0
- {lionagi-0.14.8.dist-info → lionagi-0.14.10.dist-info}/licenses/LICENSE +0 -0
lionagi/utils.py
CHANGED
@@ -7,7 +7,6 @@ import contextlib
|
|
7
7
|
import copy as _copy
|
8
8
|
import dataclasses
|
9
9
|
import functools
|
10
|
-
import importlib.metadata
|
11
10
|
import importlib.util
|
12
11
|
import json
|
13
12
|
import logging
|
@@ -15,9 +14,7 @@ import re
|
|
15
14
|
import shutil
|
16
15
|
import subprocess
|
17
16
|
import sys
|
18
|
-
import time as t_
|
19
17
|
import uuid
|
20
|
-
import xml.etree.ElementTree as ET
|
21
18
|
from abc import ABC
|
22
19
|
from collections.abc import (
|
23
20
|
AsyncGenerator,
|
@@ -28,7 +25,6 @@ from collections.abc import (
|
|
28
25
|
)
|
29
26
|
from concurrent.futures import ThreadPoolExecutor
|
30
27
|
from datetime import datetime, timezone
|
31
|
-
from decimal import Decimal
|
32
28
|
from enum import Enum
|
33
29
|
from functools import lru_cache, partial
|
34
30
|
from inspect import isclass
|
@@ -44,11 +40,12 @@ from typing import (
|
|
44
40
|
)
|
45
41
|
|
46
42
|
import anyio
|
47
|
-
from pydantic import BaseModel
|
43
|
+
from pydantic import BaseModel
|
48
44
|
from pydantic_core import PydanticUndefinedType
|
49
45
|
|
50
46
|
from .libs.concurrency import Lock as ConcurrencyLock
|
51
47
|
from .libs.concurrency import Semaphore, create_task_group
|
48
|
+
from .libs.validate.xml_parser import xml_to_dict
|
52
49
|
from .settings import Settings
|
53
50
|
|
54
51
|
R = TypeVar("R")
|
@@ -90,15 +87,21 @@ __all__ = (
|
|
90
87
|
"throttle",
|
91
88
|
"max_concurrent",
|
92
89
|
"force_async",
|
93
|
-
"to_num",
|
94
90
|
"breakdown_pydantic_annotation",
|
95
91
|
"run_package_manager_command",
|
92
|
+
"StringEnum",
|
96
93
|
)
|
97
94
|
|
98
95
|
|
99
96
|
# --- General Global Utilities Types ---
|
100
97
|
|
101
98
|
|
99
|
+
class StringEnum(str, Enum):
|
100
|
+
@classmethod
|
101
|
+
def allowed(cls) -> tuple[str, ...]:
|
102
|
+
return tuple(e.value for e in cls)
|
103
|
+
|
104
|
+
|
102
105
|
class UndefinedType:
|
103
106
|
def __init__(self) -> None:
|
104
107
|
self.undefined = True
|
@@ -139,7 +142,7 @@ def hash_dict(data) -> int:
|
|
139
142
|
|
140
143
|
class Params(BaseModel):
|
141
144
|
def keys(self):
|
142
|
-
return self.model_fields.keys()
|
145
|
+
return type(self).model_fields.keys()
|
143
146
|
|
144
147
|
def __call__(self, *args, **kwargs):
|
145
148
|
raise NotImplementedError(
|
@@ -467,24 +470,6 @@ def to_list(
|
|
467
470
|
return processed
|
468
471
|
|
469
472
|
|
470
|
-
class ToListParams(Params):
|
471
|
-
flatten: bool = False
|
472
|
-
dropna: bool = False
|
473
|
-
unique: bool = False
|
474
|
-
use_values: bool = False
|
475
|
-
flatten_tuple_set: bool = False
|
476
|
-
|
477
|
-
def __call__(self, input_: Any):
|
478
|
-
return to_list(
|
479
|
-
input_,
|
480
|
-
flatten=self.flatten,
|
481
|
-
dropna=self.dropna,
|
482
|
-
unique=self.unique,
|
483
|
-
use_values=self.use_values,
|
484
|
-
flatten_tuple_set=self.flatten_tuple_set,
|
485
|
-
)
|
486
|
-
|
487
|
-
|
488
473
|
def lcall(
|
489
474
|
input_: Iterable[T] | T,
|
490
475
|
func: Callable[[T], R] | Iterable[Callable[[T], R]],
|
@@ -592,29 +577,6 @@ def lcall(
|
|
592
577
|
return out
|
593
578
|
|
594
579
|
|
595
|
-
class CallParams(Params):
|
596
|
-
"""params class for high order function with additional handling of lower order function parameters, can take arbitrary number of args and kwargs, args need to be in agrs=, kwargs can be passed as is"""
|
597
|
-
|
598
|
-
args: list = []
|
599
|
-
kwargs: dict = {}
|
600
|
-
|
601
|
-
@model_validator(mode="before")
|
602
|
-
def _validate_data(cls, data: dict):
|
603
|
-
_d = {}
|
604
|
-
for k in list(data.keys()):
|
605
|
-
if k in cls.keys():
|
606
|
-
_d[k] = data.pop(k)
|
607
|
-
_d.setdefault("args", [])
|
608
|
-
_d.setdefault("kwargs", {})
|
609
|
-
_d["kwargs"].update(data)
|
610
|
-
return _d
|
611
|
-
|
612
|
-
def __call__(self, *args, **kwargs):
|
613
|
-
raise NotImplementedError(
|
614
|
-
"This method should be implemented in a subclass"
|
615
|
-
)
|
616
|
-
|
617
|
-
|
618
580
|
async def alcall(
|
619
581
|
input_: list[Any],
|
620
582
|
func: Callable[..., T],
|
@@ -927,92 +889,6 @@ def create_path(
|
|
927
889
|
return full_path
|
928
890
|
|
929
891
|
|
930
|
-
class CreatePathParams(Params):
|
931
|
-
directory: Path | str
|
932
|
-
filename: str
|
933
|
-
extension: str = None
|
934
|
-
timestamp: bool = False
|
935
|
-
dir_exist_ok: bool = True
|
936
|
-
file_exist_ok: bool = False
|
937
|
-
time_prefix: bool = False
|
938
|
-
timestamp_format: str | None = None
|
939
|
-
random_hash_digits: int = 0
|
940
|
-
|
941
|
-
def __call__(
|
942
|
-
self, directory: Path | str = None, filename: str = None
|
943
|
-
) -> Path:
|
944
|
-
return create_path(
|
945
|
-
directory or self.directory,
|
946
|
-
filename or self.filename,
|
947
|
-
extension=self.extension,
|
948
|
-
timestamp=self.timestamp,
|
949
|
-
dir_exist_ok=self.dir_exist_ok,
|
950
|
-
file_exist_ok=self.file_exist_ok,
|
951
|
-
time_prefix=self.time_prefix,
|
952
|
-
timestamp_format=self.timestamp_format,
|
953
|
-
random_hash_digits=self.random_hash_digits,
|
954
|
-
)
|
955
|
-
|
956
|
-
|
957
|
-
# --- JSON and XML Conversion ---
|
958
|
-
|
959
|
-
|
960
|
-
def to_xml(
|
961
|
-
obj: dict | list | str | int | float | bool | None,
|
962
|
-
root_name: str = "root",
|
963
|
-
) -> str:
|
964
|
-
"""
|
965
|
-
Convert a dictionary into an XML formatted string.
|
966
|
-
|
967
|
-
Rules:
|
968
|
-
- A dictionary key becomes an XML tag.
|
969
|
-
- If the dictionary value is:
|
970
|
-
- A primitive type (str, int, float, bool, None): it becomes the text content of the tag.
|
971
|
-
- A list: each element of the list will repeat the same tag.
|
972
|
-
- Another dictionary: it is recursively converted to nested XML.
|
973
|
-
- root_name sets the top-level XML element name.
|
974
|
-
|
975
|
-
Args:
|
976
|
-
obj: The Python object to convert (typically a dictionary).
|
977
|
-
root_name: The name of the root XML element.
|
978
|
-
|
979
|
-
Returns:
|
980
|
-
A string representing the XML.
|
981
|
-
|
982
|
-
Examples:
|
983
|
-
>>> to_xml({"a": 1, "b": {"c": "hello", "d": [10, 20]}}, root_name="data")
|
984
|
-
'<data><a>1</a><b><c>hello</c><d>10</d><d>20</d></b></data>'
|
985
|
-
"""
|
986
|
-
|
987
|
-
def _convert(value: Any, tag_name: str) -> str:
|
988
|
-
# If value is a dict, recursively convert its keys
|
989
|
-
if isinstance(value, dict):
|
990
|
-
inner = "".join(_convert(v, k) for k, v in value.items())
|
991
|
-
return f"<{tag_name}>{inner}</{tag_name}>"
|
992
|
-
# If value is a list, repeat the same tag for each element
|
993
|
-
elif isinstance(value, list):
|
994
|
-
return "".join(_convert(item, tag_name) for item in value)
|
995
|
-
# If value is a primitive, convert to string and place inside tag
|
996
|
-
else:
|
997
|
-
text = "" if value is None else str(value)
|
998
|
-
# Escape special XML characters if needed (minimal)
|
999
|
-
text = (
|
1000
|
-
text.replace("&", "&")
|
1001
|
-
.replace("<", "<")
|
1002
|
-
.replace(">", ">")
|
1003
|
-
.replace('"', """)
|
1004
|
-
.replace("'", "'")
|
1005
|
-
)
|
1006
|
-
return f"<{tag_name}>{text}</{tag_name}>"
|
1007
|
-
|
1008
|
-
# If top-level obj is not a dict, wrap it in one
|
1009
|
-
if not isinstance(obj, dict):
|
1010
|
-
obj = {root_name: obj}
|
1011
|
-
|
1012
|
-
inner_xml = "".join(_convert(v, k) for k, v in obj.items())
|
1013
|
-
return f"<{root_name}>{inner_xml}</{root_name}>"
|
1014
|
-
|
1015
|
-
|
1016
892
|
def fuzzy_parse_json(
|
1017
893
|
str_to_parse: str, /
|
1018
894
|
) -> dict[str, Any] | list[dict[str, Any]]:
|
@@ -1126,148 +1002,6 @@ def fix_json_string(str_to_parse: str, /) -> str:
|
|
1126
1002
|
return str_to_parse
|
1127
1003
|
|
1128
1004
|
|
1129
|
-
class XMLParser:
|
1130
|
-
def __init__(self, xml_string: str):
|
1131
|
-
self.xml_string = xml_string.strip()
|
1132
|
-
self.index = 0
|
1133
|
-
|
1134
|
-
def parse(self) -> dict[str, Any]:
|
1135
|
-
"""Parse the XML string and return the root element as a dictionary."""
|
1136
|
-
return self._parse_element()
|
1137
|
-
|
1138
|
-
def _parse_element(self) -> dict[str, Any]:
|
1139
|
-
"""Parse a single XML element and its children."""
|
1140
|
-
self._skip_whitespace()
|
1141
|
-
if self.xml_string[self.index] != "<":
|
1142
|
-
raise ValueError(
|
1143
|
-
f"Expected '<', found '{self.xml_string[self.index]}'"
|
1144
|
-
)
|
1145
|
-
|
1146
|
-
tag, attributes = self._parse_opening_tag()
|
1147
|
-
children: dict[str, str | list | dict] = {}
|
1148
|
-
text = ""
|
1149
|
-
|
1150
|
-
while self.index < len(self.xml_string):
|
1151
|
-
self._skip_whitespace()
|
1152
|
-
if self.xml_string.startswith("</", self.index):
|
1153
|
-
closing_tag = self._parse_closing_tag()
|
1154
|
-
if closing_tag != tag:
|
1155
|
-
raise ValueError(
|
1156
|
-
f"Mismatched tags: '{tag}' and '{closing_tag}'"
|
1157
|
-
)
|
1158
|
-
break
|
1159
|
-
elif self.xml_string.startswith("<", self.index):
|
1160
|
-
child = self._parse_element()
|
1161
|
-
child_tag, child_data = next(iter(child.items()))
|
1162
|
-
if child_tag in children:
|
1163
|
-
if not isinstance(children[child_tag], list):
|
1164
|
-
children[child_tag] = [children[child_tag]]
|
1165
|
-
children[child_tag].append(child_data)
|
1166
|
-
else:
|
1167
|
-
children[child_tag] = child_data
|
1168
|
-
else:
|
1169
|
-
text += self._parse_text()
|
1170
|
-
|
1171
|
-
result: dict[str, Any] = {}
|
1172
|
-
if attributes:
|
1173
|
-
result["@attributes"] = attributes
|
1174
|
-
if children:
|
1175
|
-
result.update(children)
|
1176
|
-
elif text.strip():
|
1177
|
-
result = text.strip()
|
1178
|
-
|
1179
|
-
return {tag: result}
|
1180
|
-
|
1181
|
-
def _parse_opening_tag(self) -> tuple[str, dict[str, str]]:
|
1182
|
-
"""Parse an opening XML tag and its attributes."""
|
1183
|
-
match = re.match(
|
1184
|
-
r'<(\w+)((?:\s+\w+="[^"]*")*)\s*/?>',
|
1185
|
-
self.xml_string[self.index :], # noqa
|
1186
|
-
)
|
1187
|
-
if not match:
|
1188
|
-
raise ValueError("Invalid opening tag")
|
1189
|
-
self.index += match.end()
|
1190
|
-
tag = match.group(1)
|
1191
|
-
attributes = dict(re.findall(r'(\w+)="([^"]*)"', match.group(2)))
|
1192
|
-
return tag, attributes
|
1193
|
-
|
1194
|
-
def _parse_closing_tag(self) -> str:
|
1195
|
-
"""Parse a closing XML tag."""
|
1196
|
-
match = re.match(r"</(\w+)>", self.xml_string[self.index :]) # noqa
|
1197
|
-
if not match:
|
1198
|
-
raise ValueError("Invalid closing tag")
|
1199
|
-
self.index += match.end()
|
1200
|
-
return match.group(1)
|
1201
|
-
|
1202
|
-
def _parse_text(self) -> str:
|
1203
|
-
"""Parse text content between XML tags."""
|
1204
|
-
start = self.index
|
1205
|
-
while (
|
1206
|
-
self.index < len(self.xml_string)
|
1207
|
-
and self.xml_string[self.index] != "<"
|
1208
|
-
):
|
1209
|
-
self.index += 1
|
1210
|
-
return self.xml_string[start : self.index] # noqa
|
1211
|
-
|
1212
|
-
def _skip_whitespace(self) -> None:
|
1213
|
-
"""Skip any whitespace characters at the current parsing position."""
|
1214
|
-
p_ = len(self.xml_string[self.index :]) # noqa
|
1215
|
-
m_ = len(self.xml_string[self.index :].lstrip()) # noqa
|
1216
|
-
|
1217
|
-
self.index += p_ - m_
|
1218
|
-
|
1219
|
-
|
1220
|
-
def xml_to_dict(
|
1221
|
-
xml_string: str,
|
1222
|
-
/,
|
1223
|
-
suppress=False,
|
1224
|
-
remove_root: bool = True,
|
1225
|
-
root_tag: str = None,
|
1226
|
-
) -> dict[str, Any]:
|
1227
|
-
"""
|
1228
|
-
Parse an XML string into a nested dictionary structure.
|
1229
|
-
|
1230
|
-
This function converts an XML string into a dictionary where:
|
1231
|
-
- Element tags become dictionary keys
|
1232
|
-
- Text content is assigned directly to the tag key if there are no children
|
1233
|
-
- Attributes are stored in a '@attributes' key
|
1234
|
-
- Multiple child elements with the same tag are stored as lists
|
1235
|
-
|
1236
|
-
Args:
|
1237
|
-
xml_string: The XML string to parse.
|
1238
|
-
|
1239
|
-
Returns:
|
1240
|
-
A dictionary representation of the XML structure.
|
1241
|
-
|
1242
|
-
Raises:
|
1243
|
-
ValueError: If the XML is malformed or parsing fails.
|
1244
|
-
"""
|
1245
|
-
try:
|
1246
|
-
a = XMLParser(xml_string).parse()
|
1247
|
-
if remove_root and (root_tag or "root") in a:
|
1248
|
-
a = a[root_tag or "root"]
|
1249
|
-
return a
|
1250
|
-
except ValueError as e:
|
1251
|
-
if not suppress:
|
1252
|
-
raise e
|
1253
|
-
|
1254
|
-
|
1255
|
-
def dict_to_xml(data: dict, /, root_tag: str = "root") -> str:
|
1256
|
-
root = ET.Element(root_tag)
|
1257
|
-
|
1258
|
-
def convert(dict_obj: dict, parent: Any) -> None:
|
1259
|
-
for key, val in dict_obj.items():
|
1260
|
-
if isinstance(val, dict):
|
1261
|
-
element = ET.SubElement(parent, key)
|
1262
|
-
convert(dict_obj=val, parent=element)
|
1263
|
-
else:
|
1264
|
-
element = ET.SubElement(parent, key)
|
1265
|
-
element.text = str(object=val)
|
1266
|
-
|
1267
|
-
convert(dict_obj=data, parent=root)
|
1268
|
-
return ET.tostring(root, encoding="unicode")
|
1269
|
-
|
1270
|
-
|
1271
1005
|
def to_dict(
|
1272
1006
|
input_: Any,
|
1273
1007
|
/,
|
@@ -1671,74 +1405,6 @@ def get_bins(input_: list[str], upper: int) -> list[list[int]]:
|
|
1671
1405
|
return bins
|
1672
1406
|
|
1673
1407
|
|
1674
|
-
class Throttle:
|
1675
|
-
"""
|
1676
|
-
Provide a throttling mechanism for function calls.
|
1677
|
-
|
1678
|
-
When used as a decorator, it ensures that the decorated function can only
|
1679
|
-
be called once per specified period. Subsequent calls within this period
|
1680
|
-
are delayed to enforce this constraint.
|
1681
|
-
|
1682
|
-
Attributes:
|
1683
|
-
period: The minimum time period (in seconds) between successive calls.
|
1684
|
-
"""
|
1685
|
-
|
1686
|
-
def __init__(self, period: float) -> None:
|
1687
|
-
"""
|
1688
|
-
Initialize a new instance of Throttle.
|
1689
|
-
|
1690
|
-
Args:
|
1691
|
-
period: The minimum time period (in seconds) between
|
1692
|
-
successive calls.
|
1693
|
-
"""
|
1694
|
-
self.period = period
|
1695
|
-
self.last_called = 0
|
1696
|
-
|
1697
|
-
def __call__(self, func: Callable[..., T]) -> Callable[..., T]:
|
1698
|
-
"""
|
1699
|
-
Decorate a synchronous function with the throttling mechanism.
|
1700
|
-
|
1701
|
-
Args:
|
1702
|
-
func: The synchronous function to be throttled.
|
1703
|
-
|
1704
|
-
Returns:
|
1705
|
-
The throttled synchronous function.
|
1706
|
-
"""
|
1707
|
-
|
1708
|
-
@functools.wraps(func)
|
1709
|
-
def wrapper(*args, **kwargs) -> Any:
|
1710
|
-
elapsed = time() - self.last_called
|
1711
|
-
if elapsed < self.period:
|
1712
|
-
t_.sleep(self.period - elapsed)
|
1713
|
-
self.last_called = time()
|
1714
|
-
return func(*args, **kwargs)
|
1715
|
-
|
1716
|
-
return wrapper
|
1717
|
-
|
1718
|
-
def __call_async__(
|
1719
|
-
self, func: Callable[..., Callable[..., T]]
|
1720
|
-
) -> Callable[..., Callable[..., T]]:
|
1721
|
-
"""
|
1722
|
-
Decorate an asynchronous function with the throttling mechanism.
|
1723
|
-
|
1724
|
-
Args:
|
1725
|
-
func: The asynchronous function to be throttled.
|
1726
|
-
|
1727
|
-
Returns:
|
1728
|
-
The throttled asynchronous function.
|
1729
|
-
"""
|
1730
|
-
|
1731
|
-
@functools.wraps(func)
|
1732
|
-
async def wrapper(*args, **kwargs) -> Any:
|
1733
|
-
elapsed = time() - self.last_called
|
1734
|
-
if elapsed < self.period:
|
1735
|
-
await asyncio.sleep(self.period - elapsed)
|
1736
|
-
self.last_called = time()
|
1737
|
-
return await func(*args, **kwargs)
|
1738
|
-
|
1739
|
-
return wrapper
|
1740
|
-
|
1741
|
-
|
1742
1408
|
def force_async(fn: Callable[..., T]) -> Callable[..., Callable[..., T]]:
|
1743
1409
|
"""
|
1744
1410
|
Convert a synchronous function to an asynchronous function
|
@@ -1773,6 +1439,8 @@ def throttle(
|
|
1773
1439
|
Returns:
|
1774
1440
|
The throttled function.
|
1775
1441
|
"""
|
1442
|
+
from lionagi.libs.concurrency.throttle import Throttle
|
1443
|
+
|
1776
1444
|
if not is_coro_func(func):
|
1777
1445
|
func = force_async(func)
|
1778
1446
|
throttle_instance = Throttle(period)
|
@@ -1810,380 +1478,6 @@ def max_concurrent(
|
|
1810
1478
|
return wrapper
|
1811
1479
|
|
1812
1480
|
|
1813
|
-
# Type definitions
|
1814
|
-
NUM_TYPE_LITERAL = Literal["int", "float", "complex"]
|
1815
|
-
NUM_TYPES = type[int] | type[float] | type[complex] | NUM_TYPE_LITERAL
|
1816
|
-
NumericType = TypeVar("NumericType", int, float, complex)
|
1817
|
-
|
1818
|
-
# Type mapping
|
1819
|
-
TYPE_MAP = {"int": int, "float": float, "complex": complex}
|
1820
|
-
|
1821
|
-
# Regex patterns for different numeric formats
|
1822
|
-
PATTERNS = {
|
1823
|
-
"scientific": r"[-+]?(?:\d*\.)?\d+[eE][-+]?\d+",
|
1824
|
-
"complex_sci": r"[-+]?(?:\d*\.)?\d+(?:[eE][-+]?\d+)?[-+](?:\d*\.)?\d+(?:[eE][-+]?\d+)?[jJ]",
|
1825
|
-
"complex": r"[-+]?(?:\d*\.)?\d+[-+](?:\d*\.)?\d+[jJ]",
|
1826
|
-
"pure_imaginary": r"[-+]?(?:\d*\.)?\d*[jJ]",
|
1827
|
-
"percentage": r"[-+]?(?:\d*\.)?\d+%",
|
1828
|
-
"fraction": r"[-+]?\d+/\d+",
|
1829
|
-
"decimal": r"[-+]?(?:\d*\.)?\d+",
|
1830
|
-
"special": r"[-+]?(?:inf|infinity|nan)",
|
1831
|
-
}
|
1832
|
-
|
1833
|
-
|
1834
|
-
def to_num(
|
1835
|
-
input_: Any,
|
1836
|
-
/,
|
1837
|
-
*,
|
1838
|
-
upper_bound: int | float | None = None,
|
1839
|
-
lower_bound: int | float | None = None,
|
1840
|
-
num_type: NUM_TYPES = float,
|
1841
|
-
precision: int | None = None,
|
1842
|
-
num_count: int = 1,
|
1843
|
-
) -> int | float | complex | list[int | float | complex]:
|
1844
|
-
"""Convert input to numeric type(s) with validation and bounds checking.
|
1845
|
-
|
1846
|
-
Args:
|
1847
|
-
input_value: The input to convert to number(s).
|
1848
|
-
upper_bound: Maximum allowed value (inclusive).
|
1849
|
-
lower_bound: Minimum allowed value (inclusive).
|
1850
|
-
num_type: Target numeric type ('int', 'float', 'complex' or type objects).
|
1851
|
-
precision: Number of decimal places for rounding (float only).
|
1852
|
-
num_count: Number of numeric values to extract.
|
1853
|
-
|
1854
|
-
Returns:
|
1855
|
-
Converted number(s). Single value if num_count=1, else list.
|
1856
|
-
|
1857
|
-
Raises:
|
1858
|
-
ValueError: For invalid input or out of bounds values.
|
1859
|
-
TypeError: For invalid input types or invalid type conversions.
|
1860
|
-
"""
|
1861
|
-
# Validate input
|
1862
|
-
if isinstance(input_, (list, tuple)):
|
1863
|
-
raise TypeError("Input cannot be a sequence")
|
1864
|
-
|
1865
|
-
# Handle boolean input
|
1866
|
-
if isinstance(input_, bool):
|
1867
|
-
return validate_num_type(num_type)(input_)
|
1868
|
-
|
1869
|
-
# Handle direct numeric input
|
1870
|
-
if isinstance(input_, (int, float, complex, Decimal)):
|
1871
|
-
inferred_type = type(input_)
|
1872
|
-
if isinstance(input_, Decimal):
|
1873
|
-
inferred_type = float
|
1874
|
-
value = float(input_) if not isinstance(input_, complex) else input_
|
1875
|
-
value = apply_bounds(value, upper_bound, lower_bound)
|
1876
|
-
value = apply_precision(value, precision)
|
1877
|
-
return convert_type(value, validate_num_type(num_type), inferred_type)
|
1878
|
-
|
1879
|
-
# Convert input to string and extract numbers
|
1880
|
-
input_str = str(input_)
|
1881
|
-
number_matches = extract_numbers(input_str)
|
1882
|
-
|
1883
|
-
if not number_matches:
|
1884
|
-
raise ValueError(f"No valid numbers found in: {input_str}")
|
1885
|
-
|
1886
|
-
# Process numbers
|
1887
|
-
results = []
|
1888
|
-
target_type = validate_num_type(num_type)
|
1889
|
-
|
1890
|
-
number_matches = (
|
1891
|
-
number_matches[:num_count]
|
1892
|
-
if num_count < len(number_matches)
|
1893
|
-
else number_matches
|
1894
|
-
)
|
1895
|
-
|
1896
|
-
for type_and_value in number_matches:
|
1897
|
-
try:
|
1898
|
-
# Infer appropriate type
|
1899
|
-
inferred_type = infer_type(type_and_value)
|
1900
|
-
|
1901
|
-
# Parse to numeric value
|
1902
|
-
value = parse_number(type_and_value)
|
1903
|
-
|
1904
|
-
# Apply bounds if not complex
|
1905
|
-
value = apply_bounds(value, upper_bound, lower_bound)
|
1906
|
-
|
1907
|
-
# Apply precision
|
1908
|
-
value = apply_precision(value, precision)
|
1909
|
-
|
1910
|
-
# Convert to target type if different from inferred
|
1911
|
-
value = convert_type(value, target_type, inferred_type)
|
1912
|
-
|
1913
|
-
results.append(value)
|
1914
|
-
|
1915
|
-
except Exception as e:
|
1916
|
-
if len(type_and_value) == 2:
|
1917
|
-
raise type(e)(
|
1918
|
-
f"Error processing {type_and_value[1]}: {str(e)}"
|
1919
|
-
)
|
1920
|
-
raise type(e)(f"Error processing {type_and_value}: {str(e)}")
|
1921
|
-
|
1922
|
-
if results and num_count == 1:
|
1923
|
-
return results[0]
|
1924
|
-
return results
|
1925
|
-
|
1926
|
-
|
1927
|
-
def extract_numbers(text: str) -> list[tuple[str, str]]:
|
1928
|
-
"""Extract numeric values from text using ordered regex patterns.
|
1929
|
-
|
1930
|
-
Args:
|
1931
|
-
text: The text to extract numbers from.
|
1932
|
-
|
1933
|
-
Returns:
|
1934
|
-
List of tuples containing (pattern_type, matched_value).
|
1935
|
-
"""
|
1936
|
-
combined_pattern = "|".join(PATTERNS.values())
|
1937
|
-
matches = re.finditer(combined_pattern, text, re.IGNORECASE)
|
1938
|
-
numbers = []
|
1939
|
-
|
1940
|
-
for match in matches:
|
1941
|
-
value = match.group()
|
1942
|
-
# Check which pattern matched
|
1943
|
-
for pattern_name, pattern in PATTERNS.items():
|
1944
|
-
if re.fullmatch(pattern, value, re.IGNORECASE):
|
1945
|
-
numbers.append((pattern_name, value))
|
1946
|
-
break
|
1947
|
-
|
1948
|
-
return numbers
|
1949
|
-
|
1950
|
-
|
1951
|
-
def validate_num_type(num_type: NUM_TYPES) -> type:
|
1952
|
-
"""Validate and normalize numeric type specification.
|
1953
|
-
|
1954
|
-
Args:
|
1955
|
-
num_type: The numeric type to validate.
|
1956
|
-
|
1957
|
-
Returns:
|
1958
|
-
The normalized Python type object.
|
1959
|
-
|
1960
|
-
Raises:
|
1961
|
-
ValueError: If the type specification is invalid.
|
1962
|
-
"""
|
1963
|
-
if isinstance(num_type, str):
|
1964
|
-
if num_type not in TYPE_MAP:
|
1965
|
-
raise ValueError(f"Invalid number type: {num_type}")
|
1966
|
-
return TYPE_MAP[num_type]
|
1967
|
-
|
1968
|
-
if num_type not in (int, float, complex):
|
1969
|
-
raise ValueError(f"Invalid number type: {num_type}")
|
1970
|
-
return num_type
|
1971
|
-
|
1972
|
-
|
1973
|
-
def infer_type(value: tuple[str, str]) -> type:
|
1974
|
-
"""Infer appropriate numeric type from value.
|
1975
|
-
|
1976
|
-
Args:
|
1977
|
-
value: Tuple of (pattern_type, matched_value).
|
1978
|
-
|
1979
|
-
Returns:
|
1980
|
-
The inferred Python type.
|
1981
|
-
"""
|
1982
|
-
pattern_type, _ = value
|
1983
|
-
if pattern_type in ("complex", "complex_sci", "pure_imaginary"):
|
1984
|
-
return complex
|
1985
|
-
return float
|
1986
|
-
|
1987
|
-
|
1988
|
-
def convert_special(value: str) -> float:
|
1989
|
-
"""Convert special float values (inf, -inf, nan).
|
1990
|
-
|
1991
|
-
Args:
|
1992
|
-
value: The string value to convert.
|
1993
|
-
|
1994
|
-
Returns:
|
1995
|
-
The converted float value.
|
1996
|
-
"""
|
1997
|
-
value = value.lower()
|
1998
|
-
if "infinity" in value or "inf" in value:
|
1999
|
-
return float("-inf") if value.startswith("-") else float("inf")
|
2000
|
-
return float("nan")
|
2001
|
-
|
2002
|
-
|
2003
|
-
def convert_percentage(value: str) -> float:
|
2004
|
-
"""Convert percentage string to float.
|
2005
|
-
|
2006
|
-
Args:
|
2007
|
-
value: The percentage string to convert.
|
2008
|
-
|
2009
|
-
Returns:
|
2010
|
-
The converted float value.
|
2011
|
-
|
2012
|
-
Raises:
|
2013
|
-
ValueError: If the percentage value is invalid.
|
2014
|
-
"""
|
2015
|
-
try:
|
2016
|
-
return float(value.rstrip("%")) / 100
|
2017
|
-
except ValueError as e:
|
2018
|
-
raise ValueError(f"Invalid percentage value: {value}") from e
|
2019
|
-
|
2020
|
-
|
2021
|
-
def convert_complex(value: str) -> complex:
|
2022
|
-
"""Convert complex number string to complex.
|
2023
|
-
|
2024
|
-
Args:
|
2025
|
-
value: The complex number string to convert.
|
2026
|
-
|
2027
|
-
Returns:
|
2028
|
-
The converted complex value.
|
2029
|
-
|
2030
|
-
Raises:
|
2031
|
-
ValueError: If the complex number is invalid.
|
2032
|
-
"""
|
2033
|
-
try:
|
2034
|
-
# Handle pure imaginary numbers
|
2035
|
-
if value.endswith("j") or value.endswith("J"):
|
2036
|
-
if value in ("j", "J"):
|
2037
|
-
return complex(0, 1)
|
2038
|
-
if value in ("+j", "+J"):
|
2039
|
-
return complex(0, 1)
|
2040
|
-
if value in ("-j", "-J"):
|
2041
|
-
return complex(0, -1)
|
2042
|
-
if "+" not in value and "-" not in value[1:]:
|
2043
|
-
# Pure imaginary number
|
2044
|
-
imag = float(value[:-1] or "1")
|
2045
|
-
return complex(0, imag)
|
2046
|
-
|
2047
|
-
return complex(value.replace(" ", ""))
|
2048
|
-
except ValueError as e:
|
2049
|
-
raise ValueError(f"Invalid complex number: {value}") from e
|
2050
|
-
|
2051
|
-
|
2052
|
-
def convert_type(
|
2053
|
-
value: float | complex,
|
2054
|
-
target_type: type,
|
2055
|
-
inferred_type: type,
|
2056
|
-
) -> int | float | complex:
|
2057
|
-
"""Convert value to target type if specified, otherwise use inferred type.
|
2058
|
-
|
2059
|
-
Args:
|
2060
|
-
value: The value to convert.
|
2061
|
-
target_type: The requested target type.
|
2062
|
-
inferred_type: The inferred type from the value.
|
2063
|
-
|
2064
|
-
Returns:
|
2065
|
-
The converted value.
|
2066
|
-
|
2067
|
-
Raises:
|
2068
|
-
TypeError: If the conversion is not possible.
|
2069
|
-
"""
|
2070
|
-
try:
|
2071
|
-
# If no specific type requested, use inferred type
|
2072
|
-
if target_type is float and inferred_type is complex:
|
2073
|
-
return value
|
2074
|
-
|
2075
|
-
# Handle explicit type conversions
|
2076
|
-
if target_type is int and isinstance(value, complex):
|
2077
|
-
raise TypeError("Cannot convert complex number to int")
|
2078
|
-
return target_type(value)
|
2079
|
-
except (ValueError, TypeError) as e:
|
2080
|
-
raise TypeError(
|
2081
|
-
f"Cannot convert {value} to {target_type.__name__}"
|
2082
|
-
) from e
|
2083
|
-
|
2084
|
-
|
2085
|
-
def apply_bounds(
|
2086
|
-
value: float | complex,
|
2087
|
-
upper_bound: float | None = None,
|
2088
|
-
lower_bound: float | None = None,
|
2089
|
-
) -> float | complex:
|
2090
|
-
"""Apply bounds checking to numeric value.
|
2091
|
-
|
2092
|
-
Args:
|
2093
|
-
value: The value to check.
|
2094
|
-
upper_bound: Maximum allowed value (inclusive).
|
2095
|
-
lower_bound: Minimum allowed value (inclusive).
|
2096
|
-
|
2097
|
-
Returns:
|
2098
|
-
The validated value.
|
2099
|
-
|
2100
|
-
Raises:
|
2101
|
-
ValueError: If the value is outside bounds.
|
2102
|
-
"""
|
2103
|
-
if isinstance(value, complex):
|
2104
|
-
return value
|
2105
|
-
|
2106
|
-
if upper_bound is not None and value > upper_bound:
|
2107
|
-
raise ValueError(f"Value {value} exceeds upper bound {upper_bound}")
|
2108
|
-
if lower_bound is not None and value < lower_bound:
|
2109
|
-
raise ValueError(f"Value {value} below lower bound {lower_bound}")
|
2110
|
-
return value
|
2111
|
-
|
2112
|
-
|
2113
|
-
def apply_precision(
|
2114
|
-
value: float | complex,
|
2115
|
-
precision: int | None,
|
2116
|
-
) -> float | complex:
|
2117
|
-
"""Apply precision rounding to numeric value.
|
2118
|
-
|
2119
|
-
Args:
|
2120
|
-
value: The value to round.
|
2121
|
-
precision: Number of decimal places.
|
2122
|
-
|
2123
|
-
Returns:
|
2124
|
-
The rounded value.
|
2125
|
-
"""
|
2126
|
-
if precision is None or isinstance(value, complex):
|
2127
|
-
return value
|
2128
|
-
if isinstance(value, float):
|
2129
|
-
return round(value, precision)
|
2130
|
-
return value
|
2131
|
-
|
2132
|
-
|
2133
|
-
def parse_number(type_and_value: tuple[str, str]) -> float | complex:
|
2134
|
-
"""Parse string to numeric value based on pattern type.
|
2135
|
-
|
2136
|
-
Args:
|
2137
|
-
type_and_value: Tuple of (pattern_type, matched_value).
|
2138
|
-
|
2139
|
-
Returns:
|
2140
|
-
The parsed numeric value.
|
2141
|
-
|
2142
|
-
Raises:
|
2143
|
-
ValueError: If parsing fails.
|
2144
|
-
"""
|
2145
|
-
num_type, value = type_and_value
|
2146
|
-
value = value.strip()
|
2147
|
-
|
2148
|
-
try:
|
2149
|
-
if num_type == "special":
|
2150
|
-
return convert_special(value)
|
2151
|
-
|
2152
|
-
if num_type == "percentage":
|
2153
|
-
return convert_percentage(value)
|
2154
|
-
|
2155
|
-
if num_type == "fraction":
|
2156
|
-
if "/" not in value:
|
2157
|
-
raise ValueError(f"Invalid fraction: {value}")
|
2158
|
-
if value.count("/") > 1:
|
2159
|
-
raise ValueError(f"Invalid fraction: {value}")
|
2160
|
-
num, denom = value.split("/")
|
2161
|
-
if not (num.strip("-").isdigit() and denom.isdigit()):
|
2162
|
-
raise ValueError(f"Invalid fraction: {value}")
|
2163
|
-
denom_val = float(denom)
|
2164
|
-
if denom_val == 0:
|
2165
|
-
raise ValueError("Division by zero")
|
2166
|
-
return float(num) / denom_val
|
2167
|
-
if num_type in ("complex", "complex_sci", "pure_imaginary"):
|
2168
|
-
return convert_complex(value)
|
2169
|
-
if num_type == "scientific":
|
2170
|
-
if "e" not in value.lower():
|
2171
|
-
raise ValueError(f"Invalid scientific notation: {value}")
|
2172
|
-
parts = value.lower().split("e")
|
2173
|
-
if len(parts) != 2:
|
2174
|
-
raise ValueError(f"Invalid scientific notation: {value}")
|
2175
|
-
if not (parts[1].lstrip("+-").isdigit()):
|
2176
|
-
raise ValueError(f"Invalid scientific notation: {value}")
|
2177
|
-
return float(value)
|
2178
|
-
if num_type == "decimal":
|
2179
|
-
return float(value)
|
2180
|
-
|
2181
|
-
raise ValueError(f"Unknown number type: {num_type}")
|
2182
|
-
except Exception as e:
|
2183
|
-
# Preserve the specific error type but wrap with more context
|
2184
|
-
raise type(e)(f"Failed to parse {value} as {num_type}: {str(e)}")
|
2185
|
-
|
2186
|
-
|
2187
1481
|
def breakdown_pydantic_annotation(
|
2188
1482
|
model: type[B], max_depth: int | None = None, current_depth: int = 0
|
2189
1483
|
) -> dict[str, Any]:
|
@@ -2403,60 +1697,3 @@ def is_import_installed(package_name: str) -> bool:
|
|
2403
1697
|
bool: True if the package is installed, False otherwise.
|
2404
1698
|
"""
|
2405
1699
|
return importlib.util.find_spec(package_name) is not None
|
2406
|
-
|
2407
|
-
|
2408
|
-
def read_image_to_base64(image_path: str | Path) -> str:
|
2409
|
-
import base64
|
2410
|
-
|
2411
|
-
import cv2
|
2412
|
-
|
2413
|
-
image_path = str(image_path)
|
2414
|
-
image = cv2.imread(image_path, cv2.COLOR_BGR2RGB)
|
2415
|
-
|
2416
|
-
if image is None:
|
2417
|
-
raise ValueError(f"Could not read image from path: {image_path}")
|
2418
|
-
|
2419
|
-
file_extension = "." + image_path.split(".")[-1]
|
2420
|
-
|
2421
|
-
success, buffer = cv2.imencode(file_extension, image)
|
2422
|
-
if not success:
|
2423
|
-
raise ValueError(f"Could not encode image to {file_extension} format.")
|
2424
|
-
encoded_image = base64.b64encode(buffer).decode("utf-8")
|
2425
|
-
return encoded_image
|
2426
|
-
|
2427
|
-
|
2428
|
-
def pdf_to_images(
|
2429
|
-
pdf_path: str, output_folder: str, dpi: int = 300, fmt: str = "jpeg"
|
2430
|
-
) -> list:
|
2431
|
-
"""
|
2432
|
-
Convert a PDF file into images, one image per page.
|
2433
|
-
|
2434
|
-
Args:
|
2435
|
-
pdf_path (str): Path to the input PDF file.
|
2436
|
-
output_folder (str): Directory to save the output images.
|
2437
|
-
dpi (int): Dots per inch (resolution) for conversion (default: 300).
|
2438
|
-
fmt (str): Image format (default: 'jpeg'). Use 'png' if preferred.
|
2439
|
-
|
2440
|
-
Returns:
|
2441
|
-
list: A list of file paths for the saved images.
|
2442
|
-
"""
|
2443
|
-
import os
|
2444
|
-
|
2445
|
-
convert_from_path = check_import(
|
2446
|
-
"pdf2image", import_name="convert_from_path"
|
2447
|
-
)
|
2448
|
-
|
2449
|
-
# Ensure the output folder exists
|
2450
|
-
os.makedirs(output_folder, exist_ok=True)
|
2451
|
-
|
2452
|
-
# Convert PDF to a list of PIL Image objects
|
2453
|
-
images = convert_from_path(pdf_path, dpi=dpi)
|
2454
|
-
|
2455
|
-
saved_paths = []
|
2456
|
-
for i, image in enumerate(images):
|
2457
|
-
# Construct the output file name
|
2458
|
-
image_file = os.path.join(output_folder, f"page_{i + 1}.{fmt}")
|
2459
|
-
image.save(image_file, fmt.upper())
|
2460
|
-
saved_paths.append(image_file)
|
2461
|
-
|
2462
|
-
return saved_paths
|