lionagi 0.12.2__py3-none-any.whl → 0.12.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lionagi/fields/file.py +1 -1
- lionagi/fields/reason.py +1 -1
- lionagi/libs/file/concat.py +1 -6
- lionagi/libs/file/concat_files.py +1 -5
- lionagi/libs/file/save.py +1 -1
- lionagi/libs/package/imports.py +8 -177
- lionagi/libs/parse.py +30 -0
- lionagi/libs/token_transform/synthlang_/resources/frameworks/framework_options.json +46 -46
- lionagi/protocols/generic/log.py +1 -2
- lionagi/protocols/messages/templates/README.md +6 -10
- lionagi/service/providers/exa_/models.py +159 -2
- lionagi/service/providers/perplexity_/models.py +143 -4
- lionagi/utils.py +921 -123
- lionagi/version.py +1 -1
- {lionagi-0.12.2.dist-info → lionagi-0.12.3.dist-info}/METADATA +27 -9
- {lionagi-0.12.2.dist-info → lionagi-0.12.3.dist-info}/RECORD +18 -27
- lionagi/libs/file/create_path.py +0 -80
- lionagi/libs/file/file_util.py +0 -358
- lionagi/libs/parse/__init__.py +0 -3
- lionagi/libs/parse/fuzzy_parse_json.py +0 -117
- lionagi/libs/parse/to_dict.py +0 -336
- lionagi/libs/parse/to_json.py +0 -61
- lionagi/libs/parse/to_num.py +0 -378
- lionagi/libs/parse/to_xml.py +0 -57
- lionagi/libs/parse/xml_parser.py +0 -148
- lionagi/libs/schema/breakdown_pydantic_annotation.py +0 -48
- {lionagi-0.12.2.dist-info → lionagi-0.12.3.dist-info}/WHEEL +0 -0
- {lionagi-0.12.2.dist-info → lionagi-0.12.3.dist-info}/licenses/LICENSE +0 -0
lionagi/utils.py
CHANGED
@@ -3,13 +3,20 @@
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
4
4
|
|
5
5
|
import asyncio
|
6
|
+
import contextlib
|
6
7
|
import copy as _copy
|
7
8
|
import functools
|
9
|
+
import importlib.metadata
|
8
10
|
import importlib.util
|
9
11
|
import json
|
10
12
|
import logging
|
13
|
+
import re
|
14
|
+
import shutil
|
11
15
|
import subprocess
|
16
|
+
import sys
|
12
17
|
import time as t_
|
18
|
+
import uuid
|
19
|
+
import xml.etree.ElementTree as ET
|
13
20
|
from abc import ABC
|
14
21
|
from collections.abc import (
|
15
22
|
AsyncGenerator,
|
@@ -20,10 +27,20 @@ from collections.abc import (
|
|
20
27
|
)
|
21
28
|
from concurrent.futures import ThreadPoolExecutor
|
22
29
|
from datetime import datetime, timezone
|
30
|
+
from decimal import Decimal
|
23
31
|
from enum import Enum
|
24
|
-
from functools import lru_cache
|
32
|
+
from functools import lru_cache, partial
|
33
|
+
from inspect import isclass
|
25
34
|
from pathlib import Path
|
26
|
-
from typing import
|
35
|
+
from typing import (
|
36
|
+
Any,
|
37
|
+
Literal,
|
38
|
+
TypedDict,
|
39
|
+
TypeVar,
|
40
|
+
get_args,
|
41
|
+
get_origin,
|
42
|
+
overload,
|
43
|
+
)
|
27
44
|
|
28
45
|
from pydantic import BaseModel, model_validator
|
29
46
|
from pydantic_core import PydanticUndefinedType
|
@@ -119,7 +136,7 @@ def hash_dict(data) -> int:
|
|
119
136
|
class Params(BaseModel):
|
120
137
|
|
121
138
|
def keys(self):
|
122
|
-
return self.
|
139
|
+
return self.model_fields.keys()
|
123
140
|
|
124
141
|
def __call__(self, *args, **kwargs):
|
125
142
|
raise NotImplementedError(
|
@@ -988,19 +1005,45 @@ def create_path(
|
|
988
1005
|
ValueError: If filename is invalid.
|
989
1006
|
FileExistsError: If file exists and file_exist_ok=False.
|
990
1007
|
"""
|
991
|
-
|
992
|
-
|
993
|
-
|
994
|
-
|
995
|
-
|
996
|
-
|
997
|
-
|
998
|
-
|
999
|
-
|
1000
|
-
|
1001
|
-
|
1002
|
-
|
1003
|
-
|
1008
|
+
if "/" in filename:
|
1009
|
+
sub_dir, filename = filename.split("/")[:-1], filename.split("/")[-1]
|
1010
|
+
directory = Path(directory) / "/".join(sub_dir)
|
1011
|
+
|
1012
|
+
if "\\" in filename:
|
1013
|
+
raise ValueError("Filename cannot contain directory separators.")
|
1014
|
+
|
1015
|
+
directory = Path(directory)
|
1016
|
+
|
1017
|
+
# Extract name and extension from filename if present
|
1018
|
+
if "." in filename:
|
1019
|
+
name, ext = filename.rsplit(".", 1)
|
1020
|
+
else:
|
1021
|
+
name, ext = filename, extension
|
1022
|
+
|
1023
|
+
# Ensure extension has a single leading dot
|
1024
|
+
ext = f".{ext.lstrip('.')}" if ext else ""
|
1025
|
+
|
1026
|
+
# Add timestamp if requested
|
1027
|
+
if timestamp:
|
1028
|
+
ts_str = datetime.now().strftime(timestamp_format or "%Y%m%d%H%M%S")
|
1029
|
+
name = f"{ts_str}_{name}" if time_prefix else f"{name}_{ts_str}"
|
1030
|
+
|
1031
|
+
# Add random suffix if requested
|
1032
|
+
if random_hash_digits > 0:
|
1033
|
+
# Use UUID4 and truncate its hex for random suffix
|
1034
|
+
random_suffix = uuid.uuid4().hex[:random_hash_digits]
|
1035
|
+
name = f"{name}-{random_suffix}"
|
1036
|
+
|
1037
|
+
full_path = directory / f"{name}{ext}"
|
1038
|
+
|
1039
|
+
# Check if file or directory existence
|
1040
|
+
full_path.parent.mkdir(parents=True, exist_ok=dir_exist_ok)
|
1041
|
+
if full_path.exists() and not file_exist_ok:
|
1042
|
+
raise FileExistsError(
|
1043
|
+
f"File {full_path} already exists and file_exist_ok is False."
|
1044
|
+
)
|
1045
|
+
|
1046
|
+
return full_path
|
1004
1047
|
|
1005
1048
|
|
1006
1049
|
class CreatePathParams(Params):
|
@@ -1059,9 +1102,34 @@ def to_xml(
|
|
1059
1102
|
>>> to_xml({"a": 1, "b": {"c": "hello", "d": [10, 20]}}, root_name="data")
|
1060
1103
|
'<data><a>1</a><b><c>hello</c><d>10</d><d>20</d></b></data>'
|
1061
1104
|
"""
|
1062
|
-
from lionagi.libs.parse.to_xml import to_xml
|
1063
1105
|
|
1064
|
-
|
1106
|
+
def _convert(value: Any, tag_name: str) -> str:
|
1107
|
+
# If value is a dict, recursively convert its keys
|
1108
|
+
if isinstance(value, dict):
|
1109
|
+
inner = "".join(_convert(v, k) for k, v in value.items())
|
1110
|
+
return f"<{tag_name}>{inner}</{tag_name}>"
|
1111
|
+
# If value is a list, repeat the same tag for each element
|
1112
|
+
elif isinstance(value, list):
|
1113
|
+
return "".join(_convert(item, tag_name) for item in value)
|
1114
|
+
# If value is a primitive, convert to string and place inside tag
|
1115
|
+
else:
|
1116
|
+
text = "" if value is None else str(value)
|
1117
|
+
# Escape special XML characters if needed (minimal)
|
1118
|
+
text = (
|
1119
|
+
text.replace("&", "&")
|
1120
|
+
.replace("<", "<")
|
1121
|
+
.replace(">", ">")
|
1122
|
+
.replace('"', """)
|
1123
|
+
.replace("'", "'")
|
1124
|
+
)
|
1125
|
+
return f"<{tag_name}>{text}</{tag_name}>"
|
1126
|
+
|
1127
|
+
# If top-level obj is not a dict, wrap it in one
|
1128
|
+
if not isinstance(obj, dict):
|
1129
|
+
obj = {root_name: obj}
|
1130
|
+
|
1131
|
+
inner_xml = "".join(_convert(v, k) for k, v in obj.items())
|
1132
|
+
return f"<{root_name}>{inner_xml}</{root_name}>"
|
1065
1133
|
|
1066
1134
|
|
1067
1135
|
def fuzzy_parse_json(
|
@@ -1086,9 +1154,186 @@ def fuzzy_parse_json(
|
|
1086
1154
|
ValueError: If the string cannot be parsed as valid JSON
|
1087
1155
|
TypeError: If the input is not a string
|
1088
1156
|
"""
|
1089
|
-
|
1157
|
+
_check_valid_str(str_to_parse)
|
1158
|
+
|
1159
|
+
# 1. Direct attempt
|
1160
|
+
with contextlib.suppress(Exception):
|
1161
|
+
return json.loads(str_to_parse)
|
1162
|
+
|
1163
|
+
# 2. Try cleaning: replace single quotes with double and normalize
|
1164
|
+
cleaned = _clean_json_string(str_to_parse.replace("'", '"'))
|
1165
|
+
with contextlib.suppress(Exception):
|
1166
|
+
return json.loads(cleaned)
|
1167
|
+
|
1168
|
+
# 3. Try fixing brackets
|
1169
|
+
fixed = fix_json_string(cleaned)
|
1170
|
+
with contextlib.suppress(Exception):
|
1171
|
+
return json.loads(fixed)
|
1172
|
+
|
1173
|
+
# If all attempts fail
|
1174
|
+
raise ValueError("Invalid JSON string")
|
1175
|
+
|
1176
|
+
|
1177
|
+
def _check_valid_str(str_to_parse: str, /):
|
1178
|
+
if not isinstance(str_to_parse, str):
|
1179
|
+
raise TypeError("Input must be a string")
|
1180
|
+
if not str_to_parse.strip():
|
1181
|
+
raise ValueError("Input string is empty")
|
1182
|
+
|
1183
|
+
|
1184
|
+
def _clean_json_string(s: str) -> str:
|
1185
|
+
"""Basic normalization: replace unescaped single quotes, trim spaces, ensure keys are quoted."""
|
1186
|
+
# Replace unescaped single quotes with double quotes
|
1187
|
+
# '(?<!\\)'" means a single quote not preceded by a backslash
|
1188
|
+
s = re.sub(r"(?<!\\)'", '"', s)
|
1189
|
+
# Collapse multiple whitespaces
|
1190
|
+
s = re.sub(r"\s+", " ", s)
|
1191
|
+
# Ensure keys are quoted
|
1192
|
+
# This attempts to find patterns like { key: value } and turn them into {"key": value}
|
1193
|
+
s = re.sub(r'([{,])\s*([^"\s]+)\s*:', r'\1"\2":', s)
|
1194
|
+
return s.strip()
|
1195
|
+
|
1196
|
+
|
1197
|
+
def fix_json_string(str_to_parse: str, /) -> str:
|
1198
|
+
"""Try to fix JSON string by ensuring brackets are matched properly."""
|
1199
|
+
if not str_to_parse:
|
1200
|
+
raise ValueError("Input string is empty")
|
1201
|
+
|
1202
|
+
brackets = {"{": "}", "[": "]"}
|
1203
|
+
open_brackets = []
|
1204
|
+
pos = 0
|
1205
|
+
length = len(str_to_parse)
|
1206
|
+
|
1207
|
+
while pos < length:
|
1208
|
+
char = str_to_parse[pos]
|
1209
|
+
|
1210
|
+
if char == "\\":
|
1211
|
+
pos += 2 # Skip escaped chars
|
1212
|
+
continue
|
1213
|
+
|
1214
|
+
if char == '"':
|
1215
|
+
pos += 1
|
1216
|
+
# skip string content
|
1217
|
+
while pos < length:
|
1218
|
+
if str_to_parse[pos] == "\\":
|
1219
|
+
pos += 2
|
1220
|
+
continue
|
1221
|
+
if str_to_parse[pos] == '"':
|
1222
|
+
pos += 1
|
1223
|
+
break
|
1224
|
+
pos += 1
|
1225
|
+
continue
|
1226
|
+
|
1227
|
+
if char in brackets:
|
1228
|
+
open_brackets.append(brackets[char])
|
1229
|
+
elif char in brackets.values():
|
1230
|
+
if not open_brackets:
|
1231
|
+
# Extra closing bracket
|
1232
|
+
# Better to raise error than guess
|
1233
|
+
raise ValueError("Extra closing bracket found.")
|
1234
|
+
if open_brackets[-1] != char:
|
1235
|
+
# Mismatched bracket
|
1236
|
+
raise ValueError("Mismatched brackets.")
|
1237
|
+
open_brackets.pop()
|
1238
|
+
|
1239
|
+
pos += 1
|
1240
|
+
|
1241
|
+
# Add missing closing brackets if any
|
1242
|
+
if open_brackets:
|
1243
|
+
str_to_parse += "".join(reversed(open_brackets))
|
1244
|
+
|
1245
|
+
return str_to_parse
|
1246
|
+
|
1247
|
+
|
1248
|
+
class XMLParser:
|
1249
|
+
def __init__(self, xml_string: str):
|
1250
|
+
self.xml_string = xml_string.strip()
|
1251
|
+
self.index = 0
|
1252
|
+
|
1253
|
+
def parse(self) -> dict[str, Any]:
|
1254
|
+
"""Parse the XML string and return the root element as a dictionary."""
|
1255
|
+
return self._parse_element()
|
1256
|
+
|
1257
|
+
def _parse_element(self) -> dict[str, Any]:
|
1258
|
+
"""Parse a single XML element and its children."""
|
1259
|
+
self._skip_whitespace()
|
1260
|
+
if self.xml_string[self.index] != "<":
|
1261
|
+
raise ValueError(
|
1262
|
+
f"Expected '<', found '{self.xml_string[self.index]}'"
|
1263
|
+
)
|
1090
1264
|
|
1091
|
-
|
1265
|
+
tag, attributes = self._parse_opening_tag()
|
1266
|
+
children: dict[str, str | list | dict] = {}
|
1267
|
+
text = ""
|
1268
|
+
|
1269
|
+
while self.index < len(self.xml_string):
|
1270
|
+
self._skip_whitespace()
|
1271
|
+
if self.xml_string.startswith("</", self.index):
|
1272
|
+
closing_tag = self._parse_closing_tag()
|
1273
|
+
if closing_tag != tag:
|
1274
|
+
raise ValueError(
|
1275
|
+
f"Mismatched tags: '{tag}' and '{closing_tag}'"
|
1276
|
+
)
|
1277
|
+
break
|
1278
|
+
elif self.xml_string.startswith("<", self.index):
|
1279
|
+
child = self._parse_element()
|
1280
|
+
child_tag, child_data = next(iter(child.items()))
|
1281
|
+
if child_tag in children:
|
1282
|
+
if not isinstance(children[child_tag], list):
|
1283
|
+
children[child_tag] = [children[child_tag]]
|
1284
|
+
children[child_tag].append(child_data)
|
1285
|
+
else:
|
1286
|
+
children[child_tag] = child_data
|
1287
|
+
else:
|
1288
|
+
text += self._parse_text()
|
1289
|
+
|
1290
|
+
result: dict[str, Any] = {}
|
1291
|
+
if attributes:
|
1292
|
+
result["@attributes"] = attributes
|
1293
|
+
if children:
|
1294
|
+
result.update(children)
|
1295
|
+
elif text.strip():
|
1296
|
+
result = text.strip()
|
1297
|
+
|
1298
|
+
return {tag: result}
|
1299
|
+
|
1300
|
+
def _parse_opening_tag(self) -> tuple[str, dict[str, str]]:
|
1301
|
+
"""Parse an opening XML tag and its attributes."""
|
1302
|
+
match = re.match(
|
1303
|
+
r'<(\w+)((?:\s+\w+="[^"]*")*)\s*/?>',
|
1304
|
+
self.xml_string[self.index :], # noqa
|
1305
|
+
)
|
1306
|
+
if not match:
|
1307
|
+
raise ValueError("Invalid opening tag")
|
1308
|
+
self.index += match.end()
|
1309
|
+
tag = match.group(1)
|
1310
|
+
attributes = dict(re.findall(r'(\w+)="([^"]*)"', match.group(2)))
|
1311
|
+
return tag, attributes
|
1312
|
+
|
1313
|
+
def _parse_closing_tag(self) -> str:
|
1314
|
+
"""Parse a closing XML tag."""
|
1315
|
+
match = re.match(r"</(\w+)>", self.xml_string[self.index :]) # noqa
|
1316
|
+
if not match:
|
1317
|
+
raise ValueError("Invalid closing tag")
|
1318
|
+
self.index += match.end()
|
1319
|
+
return match.group(1)
|
1320
|
+
|
1321
|
+
def _parse_text(self) -> str:
|
1322
|
+
"""Parse text content between XML tags."""
|
1323
|
+
start = self.index
|
1324
|
+
while (
|
1325
|
+
self.index < len(self.xml_string)
|
1326
|
+
and self.xml_string[self.index] != "<"
|
1327
|
+
):
|
1328
|
+
self.index += 1
|
1329
|
+
return self.xml_string[start : self.index] # noqa
|
1330
|
+
|
1331
|
+
def _skip_whitespace(self) -> None:
|
1332
|
+
"""Skip any whitespace characters at the current parsing position."""
|
1333
|
+
p_ = len(self.xml_string[self.index :]) # noqa
|
1334
|
+
m_ = len(self.xml_string[self.index :].lstrip()) # noqa
|
1335
|
+
|
1336
|
+
self.index += p_ - m_
|
1092
1337
|
|
1093
1338
|
|
1094
1339
|
def xml_to_dict(
|
@@ -1116,20 +1361,31 @@ def xml_to_dict(
|
|
1116
1361
|
Raises:
|
1117
1362
|
ValueError: If the XML is malformed or parsing fails.
|
1118
1363
|
"""
|
1119
|
-
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1123
|
-
|
1124
|
-
|
1125
|
-
|
1126
|
-
|
1364
|
+
try:
|
1365
|
+
a = XMLParser(xml_string).parse()
|
1366
|
+
if remove_root and (root_tag or "root") in a:
|
1367
|
+
a = a[root_tag or "root"]
|
1368
|
+
return a
|
1369
|
+
except ValueError as e:
|
1370
|
+
if not suppress:
|
1371
|
+
raise e
|
1127
1372
|
|
1128
1373
|
|
1129
1374
|
def dict_to_xml(data: dict, /, root_tag: str = "root") -> str:
|
1130
|
-
from lionagi.libs.parse.xml_parser import dict_to_xml
|
1131
1375
|
|
1132
|
-
|
1376
|
+
root = ET.Element(root_tag)
|
1377
|
+
|
1378
|
+
def convert(dict_obj: dict, parent: Any) -> None:
|
1379
|
+
for key, val in dict_obj.items():
|
1380
|
+
if isinstance(val, dict):
|
1381
|
+
element = ET.SubElement(parent, key)
|
1382
|
+
convert(dict_obj=val, parent=element)
|
1383
|
+
else:
|
1384
|
+
element = ET.SubElement(parent, key)
|
1385
|
+
element.text = str(object=val)
|
1386
|
+
|
1387
|
+
convert(dict_obj=data, parent=root)
|
1388
|
+
return ET.tostring(root, encoding="unicode")
|
1133
1389
|
|
1134
1390
|
|
1135
1391
|
def to_dict(
|
@@ -1177,23 +1433,285 @@ def to_dict(
|
|
1177
1433
|
>>> to_dict({"a": {"b": {"c": 1}}}, recursive=True, max_recursive_depth=2)
|
1178
1434
|
{'a': {'b': {'c': 1}}}
|
1179
1435
|
"""
|
1180
|
-
from lionagi.libs.parse.to_dict import to_dict
|
1181
1436
|
|
1182
|
-
|
1437
|
+
try:
|
1438
|
+
if recursive:
|
1439
|
+
input_ = recursive_to_dict(
|
1440
|
+
input_,
|
1441
|
+
use_model_dump=use_model_dump,
|
1442
|
+
fuzzy_parse=fuzzy_parse,
|
1443
|
+
str_type=str_type,
|
1444
|
+
parser=parser,
|
1445
|
+
max_recursive_depth=max_recursive_depth,
|
1446
|
+
recursive_custom_types=not recursive_python_only,
|
1447
|
+
use_enum_values=use_enum_values,
|
1448
|
+
**kwargs,
|
1449
|
+
)
|
1450
|
+
|
1451
|
+
return _to_dict(
|
1452
|
+
input_,
|
1453
|
+
fuzzy_parse=fuzzy_parse,
|
1454
|
+
parser=parser,
|
1455
|
+
str_type=str_type,
|
1456
|
+
use_model_dump=use_model_dump,
|
1457
|
+
use_enum_values=use_enum_values,
|
1458
|
+
**kwargs,
|
1459
|
+
)
|
1460
|
+
except Exception as e:
|
1461
|
+
if suppress or input_ == "":
|
1462
|
+
return {}
|
1463
|
+
raise e
|
1464
|
+
|
1465
|
+
|
1466
|
+
def recursive_to_dict(
|
1467
|
+
input_: Any,
|
1468
|
+
/,
|
1469
|
+
*,
|
1470
|
+
max_recursive_depth: int = None,
|
1471
|
+
recursive_custom_types: bool = False,
|
1472
|
+
**kwargs: Any,
|
1473
|
+
) -> Any:
|
1474
|
+
|
1475
|
+
if not isinstance(max_recursive_depth, int):
|
1476
|
+
max_recursive_depth = 5
|
1477
|
+
else:
|
1478
|
+
if max_recursive_depth < 0:
|
1479
|
+
raise ValueError(
|
1480
|
+
"max_recursive_depth must be a non-negative integer"
|
1481
|
+
)
|
1482
|
+
if max_recursive_depth == 0:
|
1483
|
+
return input_
|
1484
|
+
if max_recursive_depth > 10:
|
1485
|
+
raise ValueError(
|
1486
|
+
"max_recursive_depth must be less than or equal to 10"
|
1487
|
+
)
|
1488
|
+
|
1489
|
+
return _recur_to_dict(
|
1183
1490
|
input_,
|
1184
|
-
use_model_dump=use_model_dump,
|
1185
|
-
fuzzy_parse=fuzzy_parse,
|
1186
|
-
suppress=suppress,
|
1187
|
-
str_type=str_type,
|
1188
|
-
parser=parser,
|
1189
|
-
recursive=recursive,
|
1190
1491
|
max_recursive_depth=max_recursive_depth,
|
1191
|
-
|
1192
|
-
|
1492
|
+
current_depth=0,
|
1493
|
+
recursive_custom_types=recursive_custom_types,
|
1193
1494
|
**kwargs,
|
1194
1495
|
)
|
1195
1496
|
|
1196
1497
|
|
1498
|
+
def _recur_to_dict(
|
1499
|
+
input_: Any,
|
1500
|
+
/,
|
1501
|
+
*,
|
1502
|
+
max_recursive_depth: int,
|
1503
|
+
current_depth: int = 0,
|
1504
|
+
recursive_custom_types: bool = False,
|
1505
|
+
**kwargs: Any,
|
1506
|
+
) -> Any:
|
1507
|
+
|
1508
|
+
if current_depth >= max_recursive_depth:
|
1509
|
+
return input_
|
1510
|
+
|
1511
|
+
if isinstance(input_, str):
|
1512
|
+
try:
|
1513
|
+
# Attempt to parse the string
|
1514
|
+
parsed = _to_dict(input_, **kwargs)
|
1515
|
+
# Recursively process the parsed result
|
1516
|
+
return _recur_to_dict(
|
1517
|
+
parsed,
|
1518
|
+
max_recursive_depth=max_recursive_depth,
|
1519
|
+
current_depth=current_depth + 1,
|
1520
|
+
recursive_custom_types=recursive_custom_types,
|
1521
|
+
**kwargs,
|
1522
|
+
)
|
1523
|
+
except Exception:
|
1524
|
+
# Return the original string if parsing fails
|
1525
|
+
return input_
|
1526
|
+
|
1527
|
+
elif isinstance(input_, dict):
|
1528
|
+
# Recursively process dictionary values
|
1529
|
+
return {
|
1530
|
+
key: _recur_to_dict(
|
1531
|
+
value,
|
1532
|
+
max_recursive_depth=max_recursive_depth,
|
1533
|
+
current_depth=current_depth + 1,
|
1534
|
+
recursive_custom_types=recursive_custom_types,
|
1535
|
+
**kwargs,
|
1536
|
+
)
|
1537
|
+
for key, value in input_.items()
|
1538
|
+
}
|
1539
|
+
|
1540
|
+
elif isinstance(input_, (list, tuple, set)):
|
1541
|
+
# Recursively process list or tuple elements
|
1542
|
+
processed = [
|
1543
|
+
_recur_to_dict(
|
1544
|
+
element,
|
1545
|
+
max_recursive_depth=max_recursive_depth,
|
1546
|
+
current_depth=current_depth + 1,
|
1547
|
+
recursive_custom_types=recursive_custom_types,
|
1548
|
+
**kwargs,
|
1549
|
+
)
|
1550
|
+
for element in input_
|
1551
|
+
]
|
1552
|
+
return type(input_)(processed)
|
1553
|
+
|
1554
|
+
elif isinstance(input_, type) and issubclass(input_, Enum):
|
1555
|
+
try:
|
1556
|
+
obj_dict = _to_dict(input_, **kwargs)
|
1557
|
+
return _recur_to_dict(
|
1558
|
+
obj_dict,
|
1559
|
+
max_recursive_depth=max_recursive_depth,
|
1560
|
+
current_depth=current_depth + 1,
|
1561
|
+
**kwargs,
|
1562
|
+
)
|
1563
|
+
except Exception:
|
1564
|
+
return input_
|
1565
|
+
|
1566
|
+
elif recursive_custom_types:
|
1567
|
+
# Process custom classes if enabled
|
1568
|
+
try:
|
1569
|
+
obj_dict = _to_dict(input_, **kwargs)
|
1570
|
+
return _recur_to_dict(
|
1571
|
+
obj_dict,
|
1572
|
+
max_recursive_depth=max_recursive_depth,
|
1573
|
+
current_depth=current_depth + 1,
|
1574
|
+
recursive_custom_types=recursive_custom_types,
|
1575
|
+
**kwargs,
|
1576
|
+
)
|
1577
|
+
except Exception:
|
1578
|
+
return input_
|
1579
|
+
|
1580
|
+
else:
|
1581
|
+
# Return the input as is for other data types
|
1582
|
+
return input_
|
1583
|
+
|
1584
|
+
|
1585
|
+
def _enum_to_dict(input_, /, use_enum_values: bool = True):
|
1586
|
+
dict_ = dict(input_.__members__).copy()
|
1587
|
+
if use_enum_values:
|
1588
|
+
return {key: value.value for key, value in dict_.items()}
|
1589
|
+
return dict_
|
1590
|
+
|
1591
|
+
|
1592
|
+
def _str_to_dict(
|
1593
|
+
input_: str,
|
1594
|
+
/,
|
1595
|
+
fuzzy_parse: bool = False,
|
1596
|
+
str_type: Literal["json", "xml"] | None = "json",
|
1597
|
+
parser: Callable[[str], Any] | None = None,
|
1598
|
+
remove_root: bool = False,
|
1599
|
+
root_tag: str = "root",
|
1600
|
+
**kwargs: Any,
|
1601
|
+
):
|
1602
|
+
"""
|
1603
|
+
kwargs for parser
|
1604
|
+
"""
|
1605
|
+
if not parser:
|
1606
|
+
if str_type == "xml" and not parser:
|
1607
|
+
parser = partial(
|
1608
|
+
xml_to_dict, remove_root=remove_root, root_tag=root_tag
|
1609
|
+
)
|
1610
|
+
|
1611
|
+
elif fuzzy_parse:
|
1612
|
+
parser = fuzzy_parse_json
|
1613
|
+
else:
|
1614
|
+
parser = json.loads
|
1615
|
+
|
1616
|
+
return parser(input_, **kwargs)
|
1617
|
+
|
1618
|
+
|
1619
|
+
def _na_to_dict(input_: type[None] | UndefinedType | PydanticUndefinedType, /):
|
1620
|
+
return {}
|
1621
|
+
|
1622
|
+
|
1623
|
+
def _model_to_dict(input_: Any, /, use_model_dump=True, **kwargs):
|
1624
|
+
"""
|
1625
|
+
kwargs: built-in serialization methods kwargs
|
1626
|
+
accepted built-in serialization methods:
|
1627
|
+
- mdoel_dump
|
1628
|
+
- to_dict
|
1629
|
+
- to_json
|
1630
|
+
- dict
|
1631
|
+
- json
|
1632
|
+
"""
|
1633
|
+
|
1634
|
+
if use_model_dump and hasattr(input_, "model_dump"):
|
1635
|
+
return input_.model_dump(**kwargs)
|
1636
|
+
|
1637
|
+
methods = (
|
1638
|
+
"to_dict",
|
1639
|
+
"to_json",
|
1640
|
+
"json",
|
1641
|
+
"dict",
|
1642
|
+
)
|
1643
|
+
for method in methods:
|
1644
|
+
if hasattr(input_, method):
|
1645
|
+
result = getattr(input_, method)(**kwargs)
|
1646
|
+
return json.loads(result) if isinstance(result, str) else result
|
1647
|
+
|
1648
|
+
if hasattr(input_, "__dict__"):
|
1649
|
+
return input_.__dict__
|
1650
|
+
|
1651
|
+
try:
|
1652
|
+
return dict(input_)
|
1653
|
+
except Exception as e:
|
1654
|
+
raise ValueError(f"Unable to convert input to dictionary: {e}")
|
1655
|
+
|
1656
|
+
|
1657
|
+
def _set_to_dict(input_: set, /) -> dict:
|
1658
|
+
return {v: v for v in input_}
|
1659
|
+
|
1660
|
+
|
1661
|
+
def _iterable_to_dict(input_: Iterable, /) -> dict:
|
1662
|
+
return {idx: v for idx, v in enumerate(input_)}
|
1663
|
+
|
1664
|
+
|
1665
|
+
def _to_dict(
|
1666
|
+
input_: Any,
|
1667
|
+
/,
|
1668
|
+
*,
|
1669
|
+
fuzzy_parse: bool = False,
|
1670
|
+
str_type: Literal["json", "xml"] | None = "json",
|
1671
|
+
parser: Callable[[str], Any] | None = None,
|
1672
|
+
remove_root: bool = False,
|
1673
|
+
root_tag: str = "root",
|
1674
|
+
use_model_dump: bool = True,
|
1675
|
+
use_enum_values: bool = True,
|
1676
|
+
**kwargs: Any,
|
1677
|
+
) -> dict[str, Any]:
|
1678
|
+
|
1679
|
+
if isinstance(input_, set):
|
1680
|
+
return _set_to_dict(input_)
|
1681
|
+
|
1682
|
+
if isinstance(input_, type) and issubclass(input_, Enum):
|
1683
|
+
return _enum_to_dict(input_, use_enum_values=use_enum_values)
|
1684
|
+
|
1685
|
+
if isinstance(input_, Mapping):
|
1686
|
+
return dict(input_)
|
1687
|
+
|
1688
|
+
if isinstance(input_, type(None) | UndefinedType | PydanticUndefinedType):
|
1689
|
+
return _na_to_dict(input_)
|
1690
|
+
|
1691
|
+
if isinstance(input_, str):
|
1692
|
+
return _str_to_dict(
|
1693
|
+
input_,
|
1694
|
+
fuzzy_parse=fuzzy_parse,
|
1695
|
+
str_type=str_type,
|
1696
|
+
parser=parser,
|
1697
|
+
remove_root=remove_root,
|
1698
|
+
root_tag=root_tag,
|
1699
|
+
**kwargs,
|
1700
|
+
)
|
1701
|
+
|
1702
|
+
if isinstance(input_, BaseModel) or not isinstance(input_, Sequence):
|
1703
|
+
return _model_to_dict(input_, use_model_dump=use_model_dump, **kwargs)
|
1704
|
+
|
1705
|
+
if isinstance(input_, Iterable):
|
1706
|
+
return _iterable_to_dict(input_)
|
1707
|
+
|
1708
|
+
return dict(input_)
|
1709
|
+
|
1710
|
+
|
1711
|
+
# Precompile the regex for extracting JSON code blocks
|
1712
|
+
_JSON_BLOCK_PATTERN = re.compile(r"```json\s*(.*?)\s*```", re.DOTALL)
|
1713
|
+
|
1714
|
+
|
1197
1715
|
def to_json(
|
1198
1716
|
input_data: str | list[str], /, *, fuzzy_parse: bool = False
|
1199
1717
|
) -> dict[str, Any] | list[dict[str, Any]]:
|
@@ -1213,12 +1731,38 @@ def to_json(
|
|
1213
1731
|
- If multiple JSON objects are found: returns a list of dicts.
|
1214
1732
|
- If no valid JSON found: returns an empty list.
|
1215
1733
|
"""
|
1216
|
-
from lionagi.libs.parse.to_json import to_json
|
1217
1734
|
|
1218
|
-
|
1219
|
-
|
1220
|
-
|
1221
|
-
|
1735
|
+
# If input_data is a list, join into a single string
|
1736
|
+
if isinstance(input_data, list):
|
1737
|
+
input_str = "\n".join(input_data)
|
1738
|
+
else:
|
1739
|
+
input_str = input_data
|
1740
|
+
|
1741
|
+
# 1. Try direct parsing
|
1742
|
+
try:
|
1743
|
+
if fuzzy_parse:
|
1744
|
+
return fuzzy_parse_json(input_str)
|
1745
|
+
return json.loads(input_str)
|
1746
|
+
except Exception:
|
1747
|
+
pass
|
1748
|
+
|
1749
|
+
# 2. Attempt extracting JSON blocks from markdown
|
1750
|
+
matches = _JSON_BLOCK_PATTERN.findall(input_str)
|
1751
|
+
if not matches:
|
1752
|
+
return []
|
1753
|
+
|
1754
|
+
# If only one match, return single dict; if multiple, return list of dicts
|
1755
|
+
if len(matches) == 1:
|
1756
|
+
data_str = matches[0]
|
1757
|
+
return (
|
1758
|
+
fuzzy_parse_json(data_str) if fuzzy_parse else json.loads(data_str)
|
1759
|
+
)
|
1760
|
+
|
1761
|
+
# Multiple matches
|
1762
|
+
if fuzzy_parse:
|
1763
|
+
return [fuzzy_parse_json(m) for m in matches]
|
1764
|
+
else:
|
1765
|
+
return [json.loads(m) for m in matches]
|
1222
1766
|
|
1223
1767
|
|
1224
1768
|
def get_bins(input_: list[str], upper: int) -> list[list[int]]:
|
@@ -1434,17 +1978,71 @@ def to_num(
|
|
1434
1978
|
ValueError: For invalid input or out of bounds values.
|
1435
1979
|
TypeError: For invalid input types or invalid type conversions.
|
1436
1980
|
"""
|
1437
|
-
|
1981
|
+
# Validate input
|
1982
|
+
if isinstance(input_, (list, tuple)):
|
1983
|
+
raise TypeError("Input cannot be a sequence")
|
1984
|
+
|
1985
|
+
# Handle boolean input
|
1986
|
+
if isinstance(input_, bool):
|
1987
|
+
return validate_num_type(num_type)(input_)
|
1988
|
+
|
1989
|
+
# Handle direct numeric input
|
1990
|
+
if isinstance(input_, (int, float, complex, Decimal)):
|
1991
|
+
inferred_type = type(input_)
|
1992
|
+
if isinstance(input_, Decimal):
|
1993
|
+
inferred_type = float
|
1994
|
+
value = float(input_) if not isinstance(input_, complex) else input_
|
1995
|
+
value = apply_bounds(value, upper_bound, lower_bound)
|
1996
|
+
value = apply_precision(value, precision)
|
1997
|
+
return convert_type(value, validate_num_type(num_type), inferred_type)
|
1998
|
+
|
1999
|
+
# Convert input to string and extract numbers
|
2000
|
+
input_str = str(input_)
|
2001
|
+
number_matches = extract_numbers(input_str)
|
2002
|
+
|
2003
|
+
if not number_matches:
|
2004
|
+
raise ValueError(f"No valid numbers found in: {input_str}")
|
2005
|
+
|
2006
|
+
# Process numbers
|
2007
|
+
results = []
|
2008
|
+
target_type = validate_num_type(num_type)
|
1438
2009
|
|
1439
|
-
|
1440
|
-
|
1441
|
-
|
1442
|
-
|
1443
|
-
num_type=num_type,
|
1444
|
-
precision=precision,
|
1445
|
-
num_count=num_count,
|
2010
|
+
number_matches = (
|
2011
|
+
number_matches[:num_count]
|
2012
|
+
if num_count < len(number_matches)
|
2013
|
+
else number_matches
|
1446
2014
|
)
|
1447
2015
|
|
2016
|
+
for type_and_value in number_matches:
|
2017
|
+
try:
|
2018
|
+
# Infer appropriate type
|
2019
|
+
inferred_type = infer_type(type_and_value)
|
2020
|
+
|
2021
|
+
# Parse to numeric value
|
2022
|
+
value = parse_number(type_and_value)
|
2023
|
+
|
2024
|
+
# Apply bounds if not complex
|
2025
|
+
value = apply_bounds(value, upper_bound, lower_bound)
|
2026
|
+
|
2027
|
+
# Apply precision
|
2028
|
+
value = apply_precision(value, precision)
|
2029
|
+
|
2030
|
+
# Convert to target type if different from inferred
|
2031
|
+
value = convert_type(value, target_type, inferred_type)
|
2032
|
+
|
2033
|
+
results.append(value)
|
2034
|
+
|
2035
|
+
except Exception as e:
|
2036
|
+
if len(type_and_value) == 2:
|
2037
|
+
raise type(e)(
|
2038
|
+
f"Error processing {type_and_value[1]}: {str(e)}"
|
2039
|
+
)
|
2040
|
+
raise type(e)(f"Error processing {type_and_value}: {str(e)}")
|
2041
|
+
|
2042
|
+
if results and num_count == 1:
|
2043
|
+
return results[0]
|
2044
|
+
return results
|
2045
|
+
|
1448
2046
|
|
1449
2047
|
def extract_numbers(text: str) -> list[tuple[str, str]]:
|
1450
2048
|
"""Extract numeric values from text using ordered regex patterns.
|
@@ -1455,9 +2053,19 @@ def extract_numbers(text: str) -> list[tuple[str, str]]:
|
|
1455
2053
|
Returns:
|
1456
2054
|
List of tuples containing (pattern_type, matched_value).
|
1457
2055
|
"""
|
1458
|
-
|
2056
|
+
combined_pattern = "|".join(PATTERNS.values())
|
2057
|
+
matches = re.finditer(combined_pattern, text, re.IGNORECASE)
|
2058
|
+
numbers = []
|
1459
2059
|
|
1460
|
-
|
2060
|
+
for match in matches:
|
2061
|
+
value = match.group()
|
2062
|
+
# Check which pattern matched
|
2063
|
+
for pattern_name, pattern in PATTERNS.items():
|
2064
|
+
if re.fullmatch(pattern, value, re.IGNORECASE):
|
2065
|
+
numbers.append((pattern_name, value))
|
2066
|
+
break
|
2067
|
+
|
2068
|
+
return numbers
|
1461
2069
|
|
1462
2070
|
|
1463
2071
|
def validate_num_type(num_type: NUM_TYPES) -> type:
|
@@ -1472,9 +2080,14 @@ def validate_num_type(num_type: NUM_TYPES) -> type:
|
|
1472
2080
|
Raises:
|
1473
2081
|
ValueError: If the type specification is invalid.
|
1474
2082
|
"""
|
1475
|
-
|
2083
|
+
if isinstance(num_type, str):
|
2084
|
+
if num_type not in TYPE_MAP:
|
2085
|
+
raise ValueError(f"Invalid number type: {num_type}")
|
2086
|
+
return TYPE_MAP[num_type]
|
1476
2087
|
|
1477
|
-
|
2088
|
+
if num_type not in (int, float, complex):
|
2089
|
+
raise ValueError(f"Invalid number type: {num_type}")
|
2090
|
+
return num_type
|
1478
2091
|
|
1479
2092
|
|
1480
2093
|
def infer_type(value: tuple[str, str]) -> type:
|
@@ -1486,9 +2099,10 @@ def infer_type(value: tuple[str, str]) -> type:
|
|
1486
2099
|
Returns:
|
1487
2100
|
The inferred Python type.
|
1488
2101
|
"""
|
1489
|
-
|
1490
|
-
|
1491
|
-
|
2102
|
+
pattern_type, _ = value
|
2103
|
+
if pattern_type in ("complex", "complex_sci", "pure_imaginary"):
|
2104
|
+
return complex
|
2105
|
+
return float
|
1492
2106
|
|
1493
2107
|
|
1494
2108
|
def convert_special(value: str) -> float:
|
@@ -1500,9 +2114,10 @@ def convert_special(value: str) -> float:
|
|
1500
2114
|
Returns:
|
1501
2115
|
The converted float value.
|
1502
2116
|
"""
|
1503
|
-
|
1504
|
-
|
1505
|
-
|
2117
|
+
value = value.lower()
|
2118
|
+
if "infinity" in value or "inf" in value:
|
2119
|
+
return float("-inf") if value.startswith("-") else float("inf")
|
2120
|
+
return float("nan")
|
1506
2121
|
|
1507
2122
|
|
1508
2123
|
def convert_percentage(value: str) -> float:
|
@@ -1517,9 +2132,10 @@ def convert_percentage(value: str) -> float:
|
|
1517
2132
|
Raises:
|
1518
2133
|
ValueError: If the percentage value is invalid.
|
1519
2134
|
"""
|
1520
|
-
|
1521
|
-
|
1522
|
-
|
2135
|
+
try:
|
2136
|
+
return float(value.rstrip("%")) / 100
|
2137
|
+
except ValueError as e:
|
2138
|
+
raise ValueError(f"Invalid percentage value: {value}") from e
|
1523
2139
|
|
1524
2140
|
|
1525
2141
|
def convert_complex(value: str) -> complex:
|
@@ -1534,9 +2150,23 @@ def convert_complex(value: str) -> complex:
|
|
1534
2150
|
Raises:
|
1535
2151
|
ValueError: If the complex number is invalid.
|
1536
2152
|
"""
|
1537
|
-
|
1538
|
-
|
1539
|
-
|
2153
|
+
try:
|
2154
|
+
# Handle pure imaginary numbers
|
2155
|
+
if value.endswith("j") or value.endswith("J"):
|
2156
|
+
if value in ("j", "J"):
|
2157
|
+
return complex(0, 1)
|
2158
|
+
if value in ("+j", "+J"):
|
2159
|
+
return complex(0, 1)
|
2160
|
+
if value in ("-j", "-J"):
|
2161
|
+
return complex(0, -1)
|
2162
|
+
if "+" not in value and "-" not in value[1:]:
|
2163
|
+
# Pure imaginary number
|
2164
|
+
imag = float(value[:-1] or "1")
|
2165
|
+
return complex(0, imag)
|
2166
|
+
|
2167
|
+
return complex(value.replace(" ", ""))
|
2168
|
+
except ValueError as e:
|
2169
|
+
raise ValueError(f"Invalid complex number: {value}") from e
|
1540
2170
|
|
1541
2171
|
|
1542
2172
|
def convert_type(
|
@@ -1557,13 +2187,19 @@ def convert_type(
|
|
1557
2187
|
Raises:
|
1558
2188
|
TypeError: If the conversion is not possible.
|
1559
2189
|
"""
|
1560
|
-
|
2190
|
+
try:
|
2191
|
+
# If no specific type requested, use inferred type
|
2192
|
+
if target_type is float and inferred_type is complex:
|
2193
|
+
return value
|
1561
2194
|
|
1562
|
-
|
1563
|
-
value
|
1564
|
-
|
1565
|
-
|
1566
|
-
)
|
2195
|
+
# Handle explicit type conversions
|
2196
|
+
if target_type is int and isinstance(value, complex):
|
2197
|
+
raise TypeError("Cannot convert complex number to int")
|
2198
|
+
return target_type(value)
|
2199
|
+
except (ValueError, TypeError) as e:
|
2200
|
+
raise TypeError(
|
2201
|
+
f"Cannot convert {value} to {target_type.__name__}"
|
2202
|
+
) from e
|
1567
2203
|
|
1568
2204
|
|
1569
2205
|
def apply_bounds(
|
@@ -1584,13 +2220,14 @@ def apply_bounds(
|
|
1584
2220
|
Raises:
|
1585
2221
|
ValueError: If the value is outside bounds.
|
1586
2222
|
"""
|
1587
|
-
|
2223
|
+
if isinstance(value, complex):
|
2224
|
+
return value
|
1588
2225
|
|
1589
|
-
|
1590
|
-
value
|
1591
|
-
|
1592
|
-
lower_bound
|
1593
|
-
|
2226
|
+
if upper_bound is not None and value > upper_bound:
|
2227
|
+
raise ValueError(f"Value {value} exceeds upper bound {upper_bound}")
|
2228
|
+
if lower_bound is not None and value < lower_bound:
|
2229
|
+
raise ValueError(f"Value {value} below lower bound {lower_bound}")
|
2230
|
+
return value
|
1594
2231
|
|
1595
2232
|
|
1596
2233
|
def apply_precision(
|
@@ -1606,9 +2243,11 @@ def apply_precision(
|
|
1606
2243
|
Returns:
|
1607
2244
|
The rounded value.
|
1608
2245
|
"""
|
1609
|
-
|
1610
|
-
|
1611
|
-
|
2246
|
+
if precision is None or isinstance(value, complex):
|
2247
|
+
return value
|
2248
|
+
if isinstance(value, float):
|
2249
|
+
return round(value, precision)
|
2250
|
+
return value
|
1612
2251
|
|
1613
2252
|
|
1614
2253
|
def parse_number(type_and_value: tuple[str, str]) -> float | complex:
|
@@ -1623,32 +2262,113 @@ def parse_number(type_and_value: tuple[str, str]) -> float | complex:
|
|
1623
2262
|
Raises:
|
1624
2263
|
ValueError: If parsing fails.
|
1625
2264
|
"""
|
1626
|
-
|
1627
|
-
|
1628
|
-
|
2265
|
+
num_type, value = type_and_value
|
2266
|
+
value = value.strip()
|
2267
|
+
|
2268
|
+
try:
|
2269
|
+
if num_type == "special":
|
2270
|
+
return convert_special(value)
|
2271
|
+
|
2272
|
+
if num_type == "percentage":
|
2273
|
+
return convert_percentage(value)
|
2274
|
+
|
2275
|
+
if num_type == "fraction":
|
2276
|
+
if "/" not in value:
|
2277
|
+
raise ValueError(f"Invalid fraction: {value}")
|
2278
|
+
if value.count("/") > 1:
|
2279
|
+
raise ValueError(f"Invalid fraction: {value}")
|
2280
|
+
num, denom = value.split("/")
|
2281
|
+
if not (num.strip("-").isdigit() and denom.isdigit()):
|
2282
|
+
raise ValueError(f"Invalid fraction: {value}")
|
2283
|
+
denom_val = float(denom)
|
2284
|
+
if denom_val == 0:
|
2285
|
+
raise ValueError("Division by zero")
|
2286
|
+
return float(num) / denom_val
|
2287
|
+
if num_type in ("complex", "complex_sci", "pure_imaginary"):
|
2288
|
+
return convert_complex(value)
|
2289
|
+
if num_type == "scientific":
|
2290
|
+
if "e" not in value.lower():
|
2291
|
+
raise ValueError(f"Invalid scientific notation: {value}")
|
2292
|
+
parts = value.lower().split("e")
|
2293
|
+
if len(parts) != 2:
|
2294
|
+
raise ValueError(f"Invalid scientific notation: {value}")
|
2295
|
+
if not (parts[1].lstrip("+-").isdigit()):
|
2296
|
+
raise ValueError(f"Invalid scientific notation: {value}")
|
2297
|
+
return float(value)
|
2298
|
+
if num_type == "decimal":
|
2299
|
+
return float(value)
|
2300
|
+
|
2301
|
+
raise ValueError(f"Unknown number type: {num_type}")
|
2302
|
+
except Exception as e:
|
2303
|
+
# Preserve the specific error type but wrap with more context
|
2304
|
+
raise type(e)(f"Failed to parse {value} as {num_type}: {str(e)}")
|
1629
2305
|
|
1630
2306
|
|
1631
2307
|
def breakdown_pydantic_annotation(
|
1632
2308
|
model: type[B], max_depth: int | None = None, current_depth: int = 0
|
1633
2309
|
) -> dict[str, Any]:
|
1634
|
-
from lionagi.libs.schema.breakdown_pydantic_annotation import (
|
1635
|
-
breakdown_pydantic_annotation,
|
1636
|
-
)
|
1637
2310
|
|
1638
|
-
|
1639
|
-
model
|
1640
|
-
|
1641
|
-
|
1642
|
-
|
2311
|
+
if not _is_pydantic_model(model):
|
2312
|
+
raise TypeError("Input must be a Pydantic model")
|
2313
|
+
|
2314
|
+
if max_depth is not None and current_depth >= max_depth:
|
2315
|
+
raise RecursionError("Maximum recursion depth reached")
|
2316
|
+
|
2317
|
+
out: dict[str, Any] = {}
|
2318
|
+
for k, v in model.__annotations__.items():
|
2319
|
+
origin = get_origin(v)
|
2320
|
+
if _is_pydantic_model(v):
|
2321
|
+
out[k] = breakdown_pydantic_annotation(
|
2322
|
+
v, max_depth, current_depth + 1
|
2323
|
+
)
|
2324
|
+
elif origin is list:
|
2325
|
+
args = get_args(v)
|
2326
|
+
if args and _is_pydantic_model(args[0]):
|
2327
|
+
out[k] = [
|
2328
|
+
breakdown_pydantic_annotation(
|
2329
|
+
args[0], max_depth, current_depth + 1
|
2330
|
+
)
|
2331
|
+
]
|
2332
|
+
else:
|
2333
|
+
out[k] = [args[0] if args else Any]
|
2334
|
+
else:
|
2335
|
+
out[k] = v
|
2336
|
+
|
2337
|
+
return out
|
2338
|
+
|
2339
|
+
|
2340
|
+
def _is_pydantic_model(x: Any) -> bool:
|
2341
|
+
try:
|
2342
|
+
return isclass(x) and issubclass(x, BaseModel)
|
2343
|
+
except TypeError:
|
2344
|
+
return False
|
1643
2345
|
|
1644
2346
|
|
1645
2347
|
def run_package_manager_command(
|
1646
2348
|
args: Sequence[str],
|
1647
2349
|
) -> subprocess.CompletedProcess[bytes]:
|
1648
2350
|
"""Run a package manager command, using uv if available, otherwise falling back to pip."""
|
1649
|
-
|
2351
|
+
# Check if uv is available in PATH
|
2352
|
+
uv_path = shutil.which("uv")
|
1650
2353
|
|
1651
|
-
|
2354
|
+
if uv_path:
|
2355
|
+
# Use uv if available
|
2356
|
+
try:
|
2357
|
+
return subprocess.run(
|
2358
|
+
[uv_path] + list(args),
|
2359
|
+
check=True,
|
2360
|
+
capture_output=True,
|
2361
|
+
)
|
2362
|
+
except subprocess.CalledProcessError:
|
2363
|
+
# If uv fails, fall back to pip
|
2364
|
+
print("uv command failed, falling back to pip...")
|
2365
|
+
|
2366
|
+
# Fall back to pip
|
2367
|
+
return subprocess.run(
|
2368
|
+
[sys.executable, "-m", "pip"] + list(args),
|
2369
|
+
check=True,
|
2370
|
+
capture_output=True,
|
2371
|
+
)
|
1652
2372
|
|
1653
2373
|
|
1654
2374
|
def check_import(
|
@@ -1674,15 +2394,34 @@ def check_import(
|
|
1674
2394
|
ImportError: If the package is not found and not installed.
|
1675
2395
|
ValueError: If the import fails after installation attempt.
|
1676
2396
|
"""
|
1677
|
-
|
2397
|
+
if not is_import_installed(package_name):
|
2398
|
+
if attempt_install:
|
2399
|
+
logging.info(
|
2400
|
+
f"Package {package_name} not found. Attempting " "to install.",
|
2401
|
+
)
|
2402
|
+
try:
|
2403
|
+
return install_import(
|
2404
|
+
package_name=package_name,
|
2405
|
+
module_name=module_name,
|
2406
|
+
import_name=import_name,
|
2407
|
+
pip_name=pip_name,
|
2408
|
+
)
|
2409
|
+
except ImportError as e:
|
2410
|
+
raise ValueError(
|
2411
|
+
f"Failed to install {package_name}: {e}"
|
2412
|
+
) from e
|
2413
|
+
else:
|
2414
|
+
logging.info(
|
2415
|
+
f"Package {package_name} not found. {error_message}",
|
2416
|
+
)
|
2417
|
+
raise ImportError(
|
2418
|
+
f"Package {package_name} not found. {error_message}",
|
2419
|
+
)
|
1678
2420
|
|
1679
|
-
return
|
2421
|
+
return import_module(
|
1680
2422
|
package_name=package_name,
|
1681
2423
|
module_name=module_name,
|
1682
2424
|
import_name=import_name,
|
1683
|
-
pip_name=pip_name,
|
1684
|
-
attempt_install=attempt_install,
|
1685
|
-
error_message=error_message,
|
1686
2425
|
)
|
1687
2426
|
|
1688
2427
|
|
@@ -1703,13 +2442,31 @@ def import_module(
|
|
1703
2442
|
Raises:
|
1704
2443
|
ImportError: If the module cannot be imported.
|
1705
2444
|
"""
|
1706
|
-
|
2445
|
+
try:
|
2446
|
+
full_import_path = (
|
2447
|
+
f"{package_name}.{module_name}" if module_name else package_name
|
2448
|
+
)
|
1707
2449
|
|
1708
|
-
|
1709
|
-
|
1710
|
-
|
1711
|
-
|
1712
|
-
|
2450
|
+
if import_name:
|
2451
|
+
import_name = (
|
2452
|
+
[import_name]
|
2453
|
+
if not isinstance(import_name, list)
|
2454
|
+
else import_name
|
2455
|
+
)
|
2456
|
+
a = __import__(
|
2457
|
+
full_import_path,
|
2458
|
+
fromlist=import_name,
|
2459
|
+
)
|
2460
|
+
if len(import_name) == 1:
|
2461
|
+
return getattr(a, import_name[0])
|
2462
|
+
return [getattr(a, name) for name in import_name]
|
2463
|
+
else:
|
2464
|
+
return __import__(full_import_path)
|
2465
|
+
|
2466
|
+
except ImportError as e:
|
2467
|
+
raise ImportError(
|
2468
|
+
f"Failed to import module {full_import_path}: {e}"
|
2469
|
+
) from e
|
1713
2470
|
|
1714
2471
|
|
1715
2472
|
def install_import(
|
@@ -1731,14 +2488,29 @@ def install_import(
|
|
1731
2488
|
ImportError: If the package cannot be imported or installed.
|
1732
2489
|
subprocess.CalledProcessError: If pip installation fails.
|
1733
2490
|
"""
|
1734
|
-
|
2491
|
+
pip_name = pip_name or package_name
|
1735
2492
|
|
1736
|
-
|
1737
|
-
|
1738
|
-
|
1739
|
-
|
1740
|
-
|
1741
|
-
|
2493
|
+
try:
|
2494
|
+
return import_module(
|
2495
|
+
package_name=package_name,
|
2496
|
+
module_name=module_name,
|
2497
|
+
import_name=import_name,
|
2498
|
+
)
|
2499
|
+
except ImportError:
|
2500
|
+
logging.info(f"Installing {pip_name}...")
|
2501
|
+
try:
|
2502
|
+
run_package_manager_command(["install", pip_name])
|
2503
|
+
return import_module(
|
2504
|
+
package_name=package_name,
|
2505
|
+
module_name=module_name,
|
2506
|
+
import_name=import_name,
|
2507
|
+
)
|
2508
|
+
except subprocess.CalledProcessError as e:
|
2509
|
+
raise ImportError(f"Failed to install {pip_name}: {e}") from e
|
2510
|
+
except ImportError as e:
|
2511
|
+
raise ImportError(
|
2512
|
+
f"Failed to import {pip_name} after installation: {e}"
|
2513
|
+
) from e
|
1742
2514
|
|
1743
2515
|
|
1744
2516
|
def is_import_installed(package_name: str) -> bool:
|
@@ -1755,9 +2527,23 @@ def is_import_installed(package_name: str) -> bool:
|
|
1755
2527
|
|
1756
2528
|
|
1757
2529
|
def read_image_to_base64(image_path: str | Path) -> str:
|
1758
|
-
|
2530
|
+
import base64
|
2531
|
+
|
2532
|
+
import cv2
|
2533
|
+
|
2534
|
+
image_path = str(image_path)
|
2535
|
+
image = cv2.imread(image_path, cv2.COLOR_BGR2RGB)
|
1759
2536
|
|
1760
|
-
|
2537
|
+
if image is None:
|
2538
|
+
raise ValueError(f"Could not read image from path: {image_path}")
|
2539
|
+
|
2540
|
+
file_extension = "." + image_path.split(".")[-1]
|
2541
|
+
|
2542
|
+
success, buffer = cv2.imencode(file_extension, image)
|
2543
|
+
if not success:
|
2544
|
+
raise ValueError(f"Could not encode image to {file_extension} format.")
|
2545
|
+
encoded_image = base64.b64encode(buffer).decode("utf-8")
|
2546
|
+
return encoded_image
|
1761
2547
|
|
1762
2548
|
|
1763
2549
|
def pdf_to_images(
|
@@ -1775,11 +2561,23 @@ def pdf_to_images(
|
|
1775
2561
|
Returns:
|
1776
2562
|
list: A list of file paths for the saved images.
|
1777
2563
|
"""
|
1778
|
-
|
2564
|
+
import os
|
1779
2565
|
|
1780
|
-
|
1781
|
-
|
1782
|
-
output_folder=output_folder,
|
1783
|
-
dpi=dpi,
|
1784
|
-
fmt=fmt,
|
2566
|
+
convert_from_path = check_import(
|
2567
|
+
"pdf2image", import_name="convert_from_path"
|
1785
2568
|
)
|
2569
|
+
|
2570
|
+
# Ensure the output folder exists
|
2571
|
+
os.makedirs(output_folder, exist_ok=True)
|
2572
|
+
|
2573
|
+
# Convert PDF to a list of PIL Image objects
|
2574
|
+
images = convert_from_path(pdf_path, dpi=dpi)
|
2575
|
+
|
2576
|
+
saved_paths = []
|
2577
|
+
for i, image in enumerate(images):
|
2578
|
+
# Construct the output file name
|
2579
|
+
image_file = os.path.join(output_folder, f"page_{i+1}.{fmt}")
|
2580
|
+
image.save(image_file, fmt.upper())
|
2581
|
+
saved_paths.append(image_file)
|
2582
|
+
|
2583
|
+
return saved_paths
|