lionagi 0.14.8__py3-none-any.whl → 0.14.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. lionagi/_errors.py +120 -11
  2. lionagi/_types.py +0 -6
  3. lionagi/config.py +3 -1
  4. lionagi/fields/reason.py +1 -1
  5. lionagi/libs/concurrency/throttle.py +79 -0
  6. lionagi/libs/parse.py +2 -1
  7. lionagi/libs/unstructured/__init__.py +0 -0
  8. lionagi/libs/unstructured/pdf_to_image.py +45 -0
  9. lionagi/libs/unstructured/read_image_to_base64.py +33 -0
  10. lionagi/libs/validate/to_num.py +378 -0
  11. lionagi/libs/validate/xml_parser.py +203 -0
  12. lionagi/models/operable_model.py +8 -3
  13. lionagi/operations/flow.py +0 -1
  14. lionagi/protocols/generic/event.py +2 -0
  15. lionagi/protocols/generic/log.py +26 -10
  16. lionagi/protocols/operatives/step.py +1 -1
  17. lionagi/protocols/types.py +9 -1
  18. lionagi/service/__init__.py +22 -1
  19. lionagi/service/connections/api_calling.py +57 -2
  20. lionagi/service/connections/endpoint_config.py +1 -1
  21. lionagi/service/connections/header_factory.py +4 -2
  22. lionagi/service/connections/match_endpoint.py +10 -10
  23. lionagi/service/connections/providers/anthropic_.py +5 -2
  24. lionagi/service/connections/providers/claude_code_.py +13 -17
  25. lionagi/service/connections/providers/claude_code_cli.py +51 -16
  26. lionagi/service/connections/providers/exa_.py +5 -3
  27. lionagi/service/connections/providers/oai_.py +116 -81
  28. lionagi/service/connections/providers/ollama_.py +38 -18
  29. lionagi/service/connections/providers/perplexity_.py +36 -14
  30. lionagi/service/connections/providers/types.py +30 -0
  31. lionagi/service/hooks/__init__.py +25 -0
  32. lionagi/service/hooks/_types.py +52 -0
  33. lionagi/service/hooks/_utils.py +85 -0
  34. lionagi/service/hooks/hook_event.py +67 -0
  35. lionagi/service/hooks/hook_registry.py +221 -0
  36. lionagi/service/imodel.py +120 -34
  37. lionagi/service/third_party/claude_code.py +715 -0
  38. lionagi/service/third_party/openai_model_names.py +198 -0
  39. lionagi/service/third_party/pplx_models.py +16 -8
  40. lionagi/service/types.py +21 -0
  41. lionagi/session/branch.py +1 -4
  42. lionagi/tools/base.py +1 -3
  43. lionagi/tools/file/reader.py +1 -1
  44. lionagi/tools/memory/tools.py +2 -2
  45. lionagi/utils.py +12 -775
  46. lionagi/version.py +1 -1
  47. {lionagi-0.14.8.dist-info → lionagi-0.14.10.dist-info}/METADATA +6 -2
  48. {lionagi-0.14.8.dist-info → lionagi-0.14.10.dist-info}/RECORD +50 -40
  49. lionagi/service/connections/providers/_claude_code/__init__.py +0 -3
  50. lionagi/service/connections/providers/_claude_code/models.py +0 -244
  51. lionagi/service/connections/providers/_claude_code/stream_cli.py +0 -359
  52. lionagi/service/third_party/openai_models.py +0 -18241
  53. {lionagi-0.14.8.dist-info → lionagi-0.14.10.dist-info}/WHEEL +0 -0
  54. {lionagi-0.14.8.dist-info → lionagi-0.14.10.dist-info}/licenses/LICENSE +0 -0
lionagi/utils.py CHANGED
@@ -7,7 +7,6 @@ import contextlib
7
7
  import copy as _copy
8
8
  import dataclasses
9
9
  import functools
10
- import importlib.metadata
11
10
  import importlib.util
12
11
  import json
13
12
  import logging
@@ -15,9 +14,7 @@ import re
15
14
  import shutil
16
15
  import subprocess
17
16
  import sys
18
- import time as t_
19
17
  import uuid
20
- import xml.etree.ElementTree as ET
21
18
  from abc import ABC
22
19
  from collections.abc import (
23
20
  AsyncGenerator,
@@ -28,7 +25,6 @@ from collections.abc import (
28
25
  )
29
26
  from concurrent.futures import ThreadPoolExecutor
30
27
  from datetime import datetime, timezone
31
- from decimal import Decimal
32
28
  from enum import Enum
33
29
  from functools import lru_cache, partial
34
30
  from inspect import isclass
@@ -44,11 +40,12 @@ from typing import (
44
40
  )
45
41
 
46
42
  import anyio
47
- from pydantic import BaseModel, model_validator
43
+ from pydantic import BaseModel
48
44
  from pydantic_core import PydanticUndefinedType
49
45
 
50
46
  from .libs.concurrency import Lock as ConcurrencyLock
51
47
  from .libs.concurrency import Semaphore, create_task_group
48
+ from .libs.validate.xml_parser import xml_to_dict
52
49
  from .settings import Settings
53
50
 
54
51
  R = TypeVar("R")
@@ -90,15 +87,21 @@ __all__ = (
90
87
  "throttle",
91
88
  "max_concurrent",
92
89
  "force_async",
93
- "to_num",
94
90
  "breakdown_pydantic_annotation",
95
91
  "run_package_manager_command",
92
+ "StringEnum",
96
93
  )
97
94
 
98
95
 
99
96
  # --- General Global Utilities Types ---
100
97
 
101
98
 
99
+ class StringEnum(str, Enum):
100
+ @classmethod
101
+ def allowed(cls) -> tuple[str, ...]:
102
+ return tuple(e.value for e in cls)
103
+
104
+
102
105
  class UndefinedType:
103
106
  def __init__(self) -> None:
104
107
  self.undefined = True
@@ -139,7 +142,7 @@ def hash_dict(data) -> int:
139
142
 
140
143
  class Params(BaseModel):
141
144
  def keys(self):
142
- return self.model_fields.keys()
145
+ return type(self).model_fields.keys()
143
146
 
144
147
  def __call__(self, *args, **kwargs):
145
148
  raise NotImplementedError(
@@ -467,24 +470,6 @@ def to_list(
467
470
  return processed
468
471
 
469
472
 
470
- class ToListParams(Params):
471
- flatten: bool = False
472
- dropna: bool = False
473
- unique: bool = False
474
- use_values: bool = False
475
- flatten_tuple_set: bool = False
476
-
477
- def __call__(self, input_: Any):
478
- return to_list(
479
- input_,
480
- flatten=self.flatten,
481
- dropna=self.dropna,
482
- unique=self.unique,
483
- use_values=self.use_values,
484
- flatten_tuple_set=self.flatten_tuple_set,
485
- )
486
-
487
-
488
473
  def lcall(
489
474
  input_: Iterable[T] | T,
490
475
  func: Callable[[T], R] | Iterable[Callable[[T], R]],
@@ -592,29 +577,6 @@ def lcall(
592
577
  return out
593
578
 
594
579
 
595
- class CallParams(Params):
596
- """params class for high order function with additional handling of lower order function parameters, can take arbitrary number of args and kwargs, args need to be in agrs=, kwargs can be passed as is"""
597
-
598
- args: list = []
599
- kwargs: dict = {}
600
-
601
- @model_validator(mode="before")
602
- def _validate_data(cls, data: dict):
603
- _d = {}
604
- for k in list(data.keys()):
605
- if k in cls.keys():
606
- _d[k] = data.pop(k)
607
- _d.setdefault("args", [])
608
- _d.setdefault("kwargs", {})
609
- _d["kwargs"].update(data)
610
- return _d
611
-
612
- def __call__(self, *args, **kwargs):
613
- raise NotImplementedError(
614
- "This method should be implemented in a subclass"
615
- )
616
-
617
-
618
580
  async def alcall(
619
581
  input_: list[Any],
620
582
  func: Callable[..., T],
@@ -927,92 +889,6 @@ def create_path(
927
889
  return full_path
928
890
 
929
891
 
930
- class CreatePathParams(Params):
931
- directory: Path | str
932
- filename: str
933
- extension: str = None
934
- timestamp: bool = False
935
- dir_exist_ok: bool = True
936
- file_exist_ok: bool = False
937
- time_prefix: bool = False
938
- timestamp_format: str | None = None
939
- random_hash_digits: int = 0
940
-
941
- def __call__(
942
- self, directory: Path | str = None, filename: str = None
943
- ) -> Path:
944
- return create_path(
945
- directory or self.directory,
946
- filename or self.filename,
947
- extension=self.extension,
948
- timestamp=self.timestamp,
949
- dir_exist_ok=self.dir_exist_ok,
950
- file_exist_ok=self.file_exist_ok,
951
- time_prefix=self.time_prefix,
952
- timestamp_format=self.timestamp_format,
953
- random_hash_digits=self.random_hash_digits,
954
- )
955
-
956
-
957
- # --- JSON and XML Conversion ---
958
-
959
-
960
- def to_xml(
961
- obj: dict | list | str | int | float | bool | None,
962
- root_name: str = "root",
963
- ) -> str:
964
- """
965
- Convert a dictionary into an XML formatted string.
966
-
967
- Rules:
968
- - A dictionary key becomes an XML tag.
969
- - If the dictionary value is:
970
- - A primitive type (str, int, float, bool, None): it becomes the text content of the tag.
971
- - A list: each element of the list will repeat the same tag.
972
- - Another dictionary: it is recursively converted to nested XML.
973
- - root_name sets the top-level XML element name.
974
-
975
- Args:
976
- obj: The Python object to convert (typically a dictionary).
977
- root_name: The name of the root XML element.
978
-
979
- Returns:
980
- A string representing the XML.
981
-
982
- Examples:
983
- >>> to_xml({"a": 1, "b": {"c": "hello", "d": [10, 20]}}, root_name="data")
984
- '<data><a>1</a><b><c>hello</c><d>10</d><d>20</d></b></data>'
985
- """
986
-
987
- def _convert(value: Any, tag_name: str) -> str:
988
- # If value is a dict, recursively convert its keys
989
- if isinstance(value, dict):
990
- inner = "".join(_convert(v, k) for k, v in value.items())
991
- return f"<{tag_name}>{inner}</{tag_name}>"
992
- # If value is a list, repeat the same tag for each element
993
- elif isinstance(value, list):
994
- return "".join(_convert(item, tag_name) for item in value)
995
- # If value is a primitive, convert to string and place inside tag
996
- else:
997
- text = "" if value is None else str(value)
998
- # Escape special XML characters if needed (minimal)
999
- text = (
1000
- text.replace("&", "&amp;")
1001
- .replace("<", "&lt;")
1002
- .replace(">", "&gt;")
1003
- .replace('"', "&quot;")
1004
- .replace("'", "&apos;")
1005
- )
1006
- return f"<{tag_name}>{text}</{tag_name}>"
1007
-
1008
- # If top-level obj is not a dict, wrap it in one
1009
- if not isinstance(obj, dict):
1010
- obj = {root_name: obj}
1011
-
1012
- inner_xml = "".join(_convert(v, k) for k, v in obj.items())
1013
- return f"<{root_name}>{inner_xml}</{root_name}>"
1014
-
1015
-
1016
892
  def fuzzy_parse_json(
1017
893
  str_to_parse: str, /
1018
894
  ) -> dict[str, Any] | list[dict[str, Any]]:
@@ -1126,148 +1002,6 @@ def fix_json_string(str_to_parse: str, /) -> str:
1126
1002
  return str_to_parse
1127
1003
 
1128
1004
 
1129
- class XMLParser:
1130
- def __init__(self, xml_string: str):
1131
- self.xml_string = xml_string.strip()
1132
- self.index = 0
1133
-
1134
- def parse(self) -> dict[str, Any]:
1135
- """Parse the XML string and return the root element as a dictionary."""
1136
- return self._parse_element()
1137
-
1138
- def _parse_element(self) -> dict[str, Any]:
1139
- """Parse a single XML element and its children."""
1140
- self._skip_whitespace()
1141
- if self.xml_string[self.index] != "<":
1142
- raise ValueError(
1143
- f"Expected '<', found '{self.xml_string[self.index]}'"
1144
- )
1145
-
1146
- tag, attributes = self._parse_opening_tag()
1147
- children: dict[str, str | list | dict] = {}
1148
- text = ""
1149
-
1150
- while self.index < len(self.xml_string):
1151
- self._skip_whitespace()
1152
- if self.xml_string.startswith("</", self.index):
1153
- closing_tag = self._parse_closing_tag()
1154
- if closing_tag != tag:
1155
- raise ValueError(
1156
- f"Mismatched tags: '{tag}' and '{closing_tag}'"
1157
- )
1158
- break
1159
- elif self.xml_string.startswith("<", self.index):
1160
- child = self._parse_element()
1161
- child_tag, child_data = next(iter(child.items()))
1162
- if child_tag in children:
1163
- if not isinstance(children[child_tag], list):
1164
- children[child_tag] = [children[child_tag]]
1165
- children[child_tag].append(child_data)
1166
- else:
1167
- children[child_tag] = child_data
1168
- else:
1169
- text += self._parse_text()
1170
-
1171
- result: dict[str, Any] = {}
1172
- if attributes:
1173
- result["@attributes"] = attributes
1174
- if children:
1175
- result.update(children)
1176
- elif text.strip():
1177
- result = text.strip()
1178
-
1179
- return {tag: result}
1180
-
1181
- def _parse_opening_tag(self) -> tuple[str, dict[str, str]]:
1182
- """Parse an opening XML tag and its attributes."""
1183
- match = re.match(
1184
- r'<(\w+)((?:\s+\w+="[^"]*")*)\s*/?>',
1185
- self.xml_string[self.index :], # noqa
1186
- )
1187
- if not match:
1188
- raise ValueError("Invalid opening tag")
1189
- self.index += match.end()
1190
- tag = match.group(1)
1191
- attributes = dict(re.findall(r'(\w+)="([^"]*)"', match.group(2)))
1192
- return tag, attributes
1193
-
1194
- def _parse_closing_tag(self) -> str:
1195
- """Parse a closing XML tag."""
1196
- match = re.match(r"</(\w+)>", self.xml_string[self.index :]) # noqa
1197
- if not match:
1198
- raise ValueError("Invalid closing tag")
1199
- self.index += match.end()
1200
- return match.group(1)
1201
-
1202
- def _parse_text(self) -> str:
1203
- """Parse text content between XML tags."""
1204
- start = self.index
1205
- while (
1206
- self.index < len(self.xml_string)
1207
- and self.xml_string[self.index] != "<"
1208
- ):
1209
- self.index += 1
1210
- return self.xml_string[start : self.index] # noqa
1211
-
1212
- def _skip_whitespace(self) -> None:
1213
- """Skip any whitespace characters at the current parsing position."""
1214
- p_ = len(self.xml_string[self.index :]) # noqa
1215
- m_ = len(self.xml_string[self.index :].lstrip()) # noqa
1216
-
1217
- self.index += p_ - m_
1218
-
1219
-
1220
- def xml_to_dict(
1221
- xml_string: str,
1222
- /,
1223
- suppress=False,
1224
- remove_root: bool = True,
1225
- root_tag: str = None,
1226
- ) -> dict[str, Any]:
1227
- """
1228
- Parse an XML string into a nested dictionary structure.
1229
-
1230
- This function converts an XML string into a dictionary where:
1231
- - Element tags become dictionary keys
1232
- - Text content is assigned directly to the tag key if there are no children
1233
- - Attributes are stored in a '@attributes' key
1234
- - Multiple child elements with the same tag are stored as lists
1235
-
1236
- Args:
1237
- xml_string: The XML string to parse.
1238
-
1239
- Returns:
1240
- A dictionary representation of the XML structure.
1241
-
1242
- Raises:
1243
- ValueError: If the XML is malformed or parsing fails.
1244
- """
1245
- try:
1246
- a = XMLParser(xml_string).parse()
1247
- if remove_root and (root_tag or "root") in a:
1248
- a = a[root_tag or "root"]
1249
- return a
1250
- except ValueError as e:
1251
- if not suppress:
1252
- raise e
1253
-
1254
-
1255
- def dict_to_xml(data: dict, /, root_tag: str = "root") -> str:
1256
- root = ET.Element(root_tag)
1257
-
1258
- def convert(dict_obj: dict, parent: Any) -> None:
1259
- for key, val in dict_obj.items():
1260
- if isinstance(val, dict):
1261
- element = ET.SubElement(parent, key)
1262
- convert(dict_obj=val, parent=element)
1263
- else:
1264
- element = ET.SubElement(parent, key)
1265
- element.text = str(object=val)
1266
-
1267
- convert(dict_obj=data, parent=root)
1268
- return ET.tostring(root, encoding="unicode")
1269
-
1270
-
1271
1005
  def to_dict(
1272
1006
  input_: Any,
1273
1007
  /,
@@ -1671,74 +1405,6 @@ def get_bins(input_: list[str], upper: int) -> list[list[int]]:
1671
1405
  return bins
1672
1406
 
1673
1407
 
1674
- class Throttle:
1675
- """
1676
- Provide a throttling mechanism for function calls.
1677
-
1678
- When used as a decorator, it ensures that the decorated function can only
1679
- be called once per specified period. Subsequent calls within this period
1680
- are delayed to enforce this constraint.
1681
-
1682
- Attributes:
1683
- period: The minimum time period (in seconds) between successive calls.
1684
- """
1685
-
1686
- def __init__(self, period: float) -> None:
1687
- """
1688
- Initialize a new instance of Throttle.
1689
-
1690
- Args:
1691
- period: The minimum time period (in seconds) between
1692
- successive calls.
1693
- """
1694
- self.period = period
1695
- self.last_called = 0
1696
-
1697
- def __call__(self, func: Callable[..., T]) -> Callable[..., T]:
1698
- """
1699
- Decorate a synchronous function with the throttling mechanism.
1700
-
1701
- Args:
1702
- func: The synchronous function to be throttled.
1703
-
1704
- Returns:
1705
- The throttled synchronous function.
1706
- """
1707
-
1708
- @functools.wraps(func)
1709
- def wrapper(*args, **kwargs) -> Any:
1710
- elapsed = time() - self.last_called
1711
- if elapsed < self.period:
1712
- t_.sleep(self.period - elapsed)
1713
- self.last_called = time()
1714
- return func(*args, **kwargs)
1715
-
1716
- return wrapper
1717
-
1718
- def __call_async__(
1719
- self, func: Callable[..., Callable[..., T]]
1720
- ) -> Callable[..., Callable[..., T]]:
1721
- """
1722
- Decorate an asynchronous function with the throttling mechanism.
1723
-
1724
- Args:
1725
- func: The asynchronous function to be throttled.
1726
-
1727
- Returns:
1728
- The throttled asynchronous function.
1729
- """
1730
-
1731
- @functools.wraps(func)
1732
- async def wrapper(*args, **kwargs) -> Any:
1733
- elapsed = time() - self.last_called
1734
- if elapsed < self.period:
1735
- await asyncio.sleep(self.period - elapsed)
1736
- self.last_called = time()
1737
- return await func(*args, **kwargs)
1738
-
1739
- return wrapper
1740
-
1741
-
1742
1408
  def force_async(fn: Callable[..., T]) -> Callable[..., Callable[..., T]]:
1743
1409
  """
1744
1410
  Convert a synchronous function to an asynchronous function
@@ -1773,6 +1439,8 @@ def throttle(
1773
1439
  Returns:
1774
1440
  The throttled function.
1775
1441
  """
1442
+ from lionagi.libs.concurrency.throttle import Throttle
1443
+
1776
1444
  if not is_coro_func(func):
1777
1445
  func = force_async(func)
1778
1446
  throttle_instance = Throttle(period)
@@ -1810,380 +1478,6 @@ def max_concurrent(
1810
1478
  return wrapper
1811
1479
 
1812
1480
 
1813
- # Type definitions
1814
- NUM_TYPE_LITERAL = Literal["int", "float", "complex"]
1815
- NUM_TYPES = type[int] | type[float] | type[complex] | NUM_TYPE_LITERAL
1816
- NumericType = TypeVar("NumericType", int, float, complex)
1817
-
1818
- # Type mapping
1819
- TYPE_MAP = {"int": int, "float": float, "complex": complex}
1820
-
1821
- # Regex patterns for different numeric formats
1822
- PATTERNS = {
1823
- "scientific": r"[-+]?(?:\d*\.)?\d+[eE][-+]?\d+",
1824
- "complex_sci": r"[-+]?(?:\d*\.)?\d+(?:[eE][-+]?\d+)?[-+](?:\d*\.)?\d+(?:[eE][-+]?\d+)?[jJ]",
1825
- "complex": r"[-+]?(?:\d*\.)?\d+[-+](?:\d*\.)?\d+[jJ]",
1826
- "pure_imaginary": r"[-+]?(?:\d*\.)?\d*[jJ]",
1827
- "percentage": r"[-+]?(?:\d*\.)?\d+%",
1828
- "fraction": r"[-+]?\d+/\d+",
1829
- "decimal": r"[-+]?(?:\d*\.)?\d+",
1830
- "special": r"[-+]?(?:inf|infinity|nan)",
1831
- }
1832
-
1833
-
1834
- def to_num(
1835
- input_: Any,
1836
- /,
1837
- *,
1838
- upper_bound: int | float | None = None,
1839
- lower_bound: int | float | None = None,
1840
- num_type: NUM_TYPES = float,
1841
- precision: int | None = None,
1842
- num_count: int = 1,
1843
- ) -> int | float | complex | list[int | float | complex]:
1844
- """Convert input to numeric type(s) with validation and bounds checking.
1845
-
1846
- Args:
1847
- input_value: The input to convert to number(s).
1848
- upper_bound: Maximum allowed value (inclusive).
1849
- lower_bound: Minimum allowed value (inclusive).
1850
- num_type: Target numeric type ('int', 'float', 'complex' or type objects).
1851
- precision: Number of decimal places for rounding (float only).
1852
- num_count: Number of numeric values to extract.
1853
-
1854
- Returns:
1855
- Converted number(s). Single value if num_count=1, else list.
1856
-
1857
- Raises:
1858
- ValueError: For invalid input or out of bounds values.
1859
- TypeError: For invalid input types or invalid type conversions.
1860
- """
1861
- # Validate input
1862
- if isinstance(input_, (list, tuple)):
1863
- raise TypeError("Input cannot be a sequence")
1864
-
1865
- # Handle boolean input
1866
- if isinstance(input_, bool):
1867
- return validate_num_type(num_type)(input_)
1868
-
1869
- # Handle direct numeric input
1870
- if isinstance(input_, (int, float, complex, Decimal)):
1871
- inferred_type = type(input_)
1872
- if isinstance(input_, Decimal):
1873
- inferred_type = float
1874
- value = float(input_) if not isinstance(input_, complex) else input_
1875
- value = apply_bounds(value, upper_bound, lower_bound)
1876
- value = apply_precision(value, precision)
1877
- return convert_type(value, validate_num_type(num_type), inferred_type)
1878
-
1879
- # Convert input to string and extract numbers
1880
- input_str = str(input_)
1881
- number_matches = extract_numbers(input_str)
1882
-
1883
- if not number_matches:
1884
- raise ValueError(f"No valid numbers found in: {input_str}")
1885
-
1886
- # Process numbers
1887
- results = []
1888
- target_type = validate_num_type(num_type)
1889
-
1890
- number_matches = (
1891
- number_matches[:num_count]
1892
- if num_count < len(number_matches)
1893
- else number_matches
1894
- )
1895
-
1896
- for type_and_value in number_matches:
1897
- try:
1898
- # Infer appropriate type
1899
- inferred_type = infer_type(type_and_value)
1900
-
1901
- # Parse to numeric value
1902
- value = parse_number(type_and_value)
1903
-
1904
- # Apply bounds if not complex
1905
- value = apply_bounds(value, upper_bound, lower_bound)
1906
-
1907
- # Apply precision
1908
- value = apply_precision(value, precision)
1909
-
1910
- # Convert to target type if different from inferred
1911
- value = convert_type(value, target_type, inferred_type)
1912
-
1913
- results.append(value)
1914
-
1915
- except Exception as e:
1916
- if len(type_and_value) == 2:
1917
- raise type(e)(
1918
- f"Error processing {type_and_value[1]}: {str(e)}"
1919
- )
1920
- raise type(e)(f"Error processing {type_and_value}: {str(e)}")
1921
-
1922
- if results and num_count == 1:
1923
- return results[0]
1924
- return results
1925
-
1926
-
1927
- def extract_numbers(text: str) -> list[tuple[str, str]]:
1928
- """Extract numeric values from text using ordered regex patterns.
1929
-
1930
- Args:
1931
- text: The text to extract numbers from.
1932
-
1933
- Returns:
1934
- List of tuples containing (pattern_type, matched_value).
1935
- """
1936
- combined_pattern = "|".join(PATTERNS.values())
1937
- matches = re.finditer(combined_pattern, text, re.IGNORECASE)
1938
- numbers = []
1939
-
1940
- for match in matches:
1941
- value = match.group()
1942
- # Check which pattern matched
1943
- for pattern_name, pattern in PATTERNS.items():
1944
- if re.fullmatch(pattern, value, re.IGNORECASE):
1945
- numbers.append((pattern_name, value))
1946
- break
1947
-
1948
- return numbers
1949
-
1950
-
1951
- def validate_num_type(num_type: NUM_TYPES) -> type:
1952
- """Validate and normalize numeric type specification.
1953
-
1954
- Args:
1955
- num_type: The numeric type to validate.
1956
-
1957
- Returns:
1958
- The normalized Python type object.
1959
-
1960
- Raises:
1961
- ValueError: If the type specification is invalid.
1962
- """
1963
- if isinstance(num_type, str):
1964
- if num_type not in TYPE_MAP:
1965
- raise ValueError(f"Invalid number type: {num_type}")
1966
- return TYPE_MAP[num_type]
1967
-
1968
- if num_type not in (int, float, complex):
1969
- raise ValueError(f"Invalid number type: {num_type}")
1970
- return num_type
1971
-
1972
-
1973
- def infer_type(value: tuple[str, str]) -> type:
1974
- """Infer appropriate numeric type from value.
1975
-
1976
- Args:
1977
- value: Tuple of (pattern_type, matched_value).
1978
-
1979
- Returns:
1980
- The inferred Python type.
1981
- """
1982
- pattern_type, _ = value
1983
- if pattern_type in ("complex", "complex_sci", "pure_imaginary"):
1984
- return complex
1985
- return float
1986
-
1987
-
1988
- def convert_special(value: str) -> float:
1989
- """Convert special float values (inf, -inf, nan).
1990
-
1991
- Args:
1992
- value: The string value to convert.
1993
-
1994
- Returns:
1995
- The converted float value.
1996
- """
1997
- value = value.lower()
1998
- if "infinity" in value or "inf" in value:
1999
- return float("-inf") if value.startswith("-") else float("inf")
2000
- return float("nan")
2001
-
2002
-
2003
- def convert_percentage(value: str) -> float:
2004
- """Convert percentage string to float.
2005
-
2006
- Args:
2007
- value: The percentage string to convert.
2008
-
2009
- Returns:
2010
- The converted float value.
2011
-
2012
- Raises:
2013
- ValueError: If the percentage value is invalid.
2014
- """
2015
- try:
2016
- return float(value.rstrip("%")) / 100
2017
- except ValueError as e:
2018
- raise ValueError(f"Invalid percentage value: {value}") from e
2019
-
2020
-
2021
- def convert_complex(value: str) -> complex:
2022
- """Convert complex number string to complex.
2023
-
2024
- Args:
2025
- value: The complex number string to convert.
2026
-
2027
- Returns:
2028
- The converted complex value.
2029
-
2030
- Raises:
2031
- ValueError: If the complex number is invalid.
2032
- """
2033
- try:
2034
- # Handle pure imaginary numbers
2035
- if value.endswith("j") or value.endswith("J"):
2036
- if value in ("j", "J"):
2037
- return complex(0, 1)
2038
- if value in ("+j", "+J"):
2039
- return complex(0, 1)
2040
- if value in ("-j", "-J"):
2041
- return complex(0, -1)
2042
- if "+" not in value and "-" not in value[1:]:
2043
- # Pure imaginary number
2044
- imag = float(value[:-1] or "1")
2045
- return complex(0, imag)
2046
-
2047
- return complex(value.replace(" ", ""))
2048
- except ValueError as e:
2049
- raise ValueError(f"Invalid complex number: {value}") from e
2050
-
2051
-
2052
- def convert_type(
2053
- value: float | complex,
2054
- target_type: type,
2055
- inferred_type: type,
2056
- ) -> int | float | complex:
2057
- """Convert value to target type if specified, otherwise use inferred type.
2058
-
2059
- Args:
2060
- value: The value to convert.
2061
- target_type: The requested target type.
2062
- inferred_type: The inferred type from the value.
2063
-
2064
- Returns:
2065
- The converted value.
2066
-
2067
- Raises:
2068
- TypeError: If the conversion is not possible.
2069
- """
2070
- try:
2071
- # If no specific type requested, use inferred type
2072
- if target_type is float and inferred_type is complex:
2073
- return value
2074
-
2075
- # Handle explicit type conversions
2076
- if target_type is int and isinstance(value, complex):
2077
- raise TypeError("Cannot convert complex number to int")
2078
- return target_type(value)
2079
- except (ValueError, TypeError) as e:
2080
- raise TypeError(
2081
- f"Cannot convert {value} to {target_type.__name__}"
2082
- ) from e
2083
-
2084
-
2085
- def apply_bounds(
2086
- value: float | complex,
2087
- upper_bound: float | None = None,
2088
- lower_bound: float | None = None,
2089
- ) -> float | complex:
2090
- """Apply bounds checking to numeric value.
2091
-
2092
- Args:
2093
- value: The value to check.
2094
- upper_bound: Maximum allowed value (inclusive).
2095
- lower_bound: Minimum allowed value (inclusive).
2096
-
2097
- Returns:
2098
- The validated value.
2099
-
2100
- Raises:
2101
- ValueError: If the value is outside bounds.
2102
- """
2103
- if isinstance(value, complex):
2104
- return value
2105
-
2106
- if upper_bound is not None and value > upper_bound:
2107
- raise ValueError(f"Value {value} exceeds upper bound {upper_bound}")
2108
- if lower_bound is not None and value < lower_bound:
2109
- raise ValueError(f"Value {value} below lower bound {lower_bound}")
2110
- return value
2111
-
2112
-
2113
- def apply_precision(
2114
- value: float | complex,
2115
- precision: int | None,
2116
- ) -> float | complex:
2117
- """Apply precision rounding to numeric value.
2118
-
2119
- Args:
2120
- value: The value to round.
2121
- precision: Number of decimal places.
2122
-
2123
- Returns:
2124
- The rounded value.
2125
- """
2126
- if precision is None or isinstance(value, complex):
2127
- return value
2128
- if isinstance(value, float):
2129
- return round(value, precision)
2130
- return value
2131
-
2132
-
2133
- def parse_number(type_and_value: tuple[str, str]) -> float | complex:
2134
- """Parse string to numeric value based on pattern type.
2135
-
2136
- Args:
2137
- type_and_value: Tuple of (pattern_type, matched_value).
2138
-
2139
- Returns:
2140
- The parsed numeric value.
2141
-
2142
- Raises:
2143
- ValueError: If parsing fails.
2144
- """
2145
- num_type, value = type_and_value
2146
- value = value.strip()
2147
-
2148
- try:
2149
- if num_type == "special":
2150
- return convert_special(value)
2151
-
2152
- if num_type == "percentage":
2153
- return convert_percentage(value)
2154
-
2155
- if num_type == "fraction":
2156
- if "/" not in value:
2157
- raise ValueError(f"Invalid fraction: {value}")
2158
- if value.count("/") > 1:
2159
- raise ValueError(f"Invalid fraction: {value}")
2160
- num, denom = value.split("/")
2161
- if not (num.strip("-").isdigit() and denom.isdigit()):
2162
- raise ValueError(f"Invalid fraction: {value}")
2163
- denom_val = float(denom)
2164
- if denom_val == 0:
2165
- raise ValueError("Division by zero")
2166
- return float(num) / denom_val
2167
- if num_type in ("complex", "complex_sci", "pure_imaginary"):
2168
- return convert_complex(value)
2169
- if num_type == "scientific":
2170
- if "e" not in value.lower():
2171
- raise ValueError(f"Invalid scientific notation: {value}")
2172
- parts = value.lower().split("e")
2173
- if len(parts) != 2:
2174
- raise ValueError(f"Invalid scientific notation: {value}")
2175
- if not (parts[1].lstrip("+-").isdigit()):
2176
- raise ValueError(f"Invalid scientific notation: {value}")
2177
- return float(value)
2178
- if num_type == "decimal":
2179
- return float(value)
2180
-
2181
- raise ValueError(f"Unknown number type: {num_type}")
2182
- except Exception as e:
2183
- # Preserve the specific error type but wrap with more context
2184
- raise type(e)(f"Failed to parse {value} as {num_type}: {str(e)}")
2185
-
2186
-
2187
1481
  def breakdown_pydantic_annotation(
2188
1482
  model: type[B], max_depth: int | None = None, current_depth: int = 0
2189
1483
  ) -> dict[str, Any]:
@@ -2403,60 +1697,3 @@ def is_import_installed(package_name: str) -> bool:
2403
1697
  bool: True if the package is installed, False otherwise.
2404
1698
  """
2405
1699
  return importlib.util.find_spec(package_name) is not None
2406
-
2407
-
2408
- def read_image_to_base64(image_path: str | Path) -> str:
2409
- import base64
2410
-
2411
- import cv2
2412
-
2413
- image_path = str(image_path)
2414
- image = cv2.imread(image_path, cv2.COLOR_BGR2RGB)
2415
-
2416
- if image is None:
2417
- raise ValueError(f"Could not read image from path: {image_path}")
2418
-
2419
- file_extension = "." + image_path.split(".")[-1]
2420
-
2421
- success, buffer = cv2.imencode(file_extension, image)
2422
- if not success:
2423
- raise ValueError(f"Could not encode image to {file_extension} format.")
2424
- encoded_image = base64.b64encode(buffer).decode("utf-8")
2425
- return encoded_image
2426
-
2427
-
2428
- def pdf_to_images(
2429
- pdf_path: str, output_folder: str, dpi: int = 300, fmt: str = "jpeg"
2430
- ) -> list:
2431
- """
2432
- Convert a PDF file into images, one image per page.
2433
-
2434
- Args:
2435
- pdf_path (str): Path to the input PDF file.
2436
- output_folder (str): Directory to save the output images.
2437
- dpi (int): Dots per inch (resolution) for conversion (default: 300).
2438
- fmt (str): Image format (default: 'jpeg'). Use 'png' if preferred.
2439
-
2440
- Returns:
2441
- list: A list of file paths for the saved images.
2442
- """
2443
- import os
2444
-
2445
- convert_from_path = check_import(
2446
- "pdf2image", import_name="convert_from_path"
2447
- )
2448
-
2449
- # Ensure the output folder exists
2450
- os.makedirs(output_folder, exist_ok=True)
2451
-
2452
- # Convert PDF to a list of PIL Image objects
2453
- images = convert_from_path(pdf_path, dpi=dpi)
2454
-
2455
- saved_paths = []
2456
- for i, image in enumerate(images):
2457
- # Construct the output file name
2458
- image_file = os.path.join(output_folder, f"page_{i + 1}.{fmt}")
2459
- image.save(image_file, fmt.upper())
2460
- saved_paths.append(image_file)
2461
-
2462
- return saved_paths