lionagi 0.16.2__py3-none-any.whl → 0.16.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lionagi/adapters/_utils.py +0 -14
- lionagi/ln/__init__.py +4 -0
- lionagi/ln/fuzzy/__init__.py +4 -1
- lionagi/ln/fuzzy/_fuzzy_validate.py +109 -0
- lionagi/ln/fuzzy/_to_dict.py +388 -0
- lionagi/models/__init__.py +0 -2
- lionagi/operations/communicate/communicate.py +1 -1
- lionagi/operations/parse/parse.py +1 -1
- lionagi/protocols/generic/pile.py +1 -1
- lionagi/protocols/operatives/operative.py +2 -2
- lionagi/service/connections/match_endpoint.py +2 -10
- lionagi/service/connections/providers/types.py +1 -3
- lionagi/service/hooks/hook_event.py +1 -1
- lionagi/service/hooks/hook_registry.py +1 -1
- lionagi/service/rate_limited_processor.py +1 -1
- lionagi/utils.py +3 -335
- lionagi/version.py +1 -1
- {lionagi-0.16.2.dist-info → lionagi-0.16.3.dist-info}/METADATA +3 -12
- {lionagi-0.16.2.dist-info → lionagi-0.16.3.dist-info}/RECORD +21 -43
- lionagi/adapters/postgres_model_adapter.py +0 -131
- lionagi/libs/concurrency.py +0 -1
- lionagi/libs/nested/__init__.py +0 -3
- lionagi/libs/nested/flatten.py +0 -172
- lionagi/libs/nested/nfilter.py +0 -59
- lionagi/libs/nested/nget.py +0 -45
- lionagi/libs/nested/ninsert.py +0 -104
- lionagi/libs/nested/nmerge.py +0 -158
- lionagi/libs/nested/npop.py +0 -69
- lionagi/libs/nested/nset.py +0 -94
- lionagi/libs/nested/unflatten.py +0 -83
- lionagi/libs/nested/utils.py +0 -189
- lionagi/libs/parse.py +0 -31
- lionagi/libs/schema/json_schema.py +0 -231
- lionagi/libs/unstructured/__init__.py +0 -0
- lionagi/libs/unstructured/pdf_to_image.py +0 -45
- lionagi/libs/unstructured/read_image_to_base64.py +0 -33
- lionagi/libs/validate/fuzzy_match_keys.py +0 -7
- lionagi/libs/validate/fuzzy_validate_mapping.py +0 -144
- lionagi/libs/validate/string_similarity.py +0 -7
- lionagi/libs/validate/xml_parser.py +0 -203
- lionagi/models/note.py +0 -387
- lionagi/service/connections/providers/claude_code_.py +0 -299
- {lionagi-0.16.2.dist-info → lionagi-0.16.3.dist-info}/WHEEL +0 -0
- {lionagi-0.16.2.dist-info → lionagi-0.16.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,45 +0,0 @@
|
|
1
|
-
from lionagi.utils import import_module, is_import_installed
|
2
|
-
|
3
|
-
_HAS_PDF2IMAGE = is_import_installed("pdf2image")
|
4
|
-
|
5
|
-
|
6
|
-
def pdf_to_images(
|
7
|
-
pdf_path: str, output_folder: str, dpi: int = 300, fmt: str = "jpeg"
|
8
|
-
) -> list:
|
9
|
-
"""
|
10
|
-
Convert a PDF file into images, one image per page.
|
11
|
-
|
12
|
-
Args:
|
13
|
-
pdf_path (str): Path to the input PDF file.
|
14
|
-
output_folder (str): Directory to save the output images.
|
15
|
-
dpi (int): Dots per inch (resolution) for conversion (default: 300).
|
16
|
-
fmt (str): Image format (default: 'jpeg'). Use 'png' if preferred.
|
17
|
-
|
18
|
-
Returns:
|
19
|
-
list: A list of file paths for the saved images.
|
20
|
-
"""
|
21
|
-
if not _HAS_PDF2IMAGE:
|
22
|
-
raise ModuleNotFoundError(
|
23
|
-
"pdf2image is not installed, please install it with `pip install lionagi[unstructured]`"
|
24
|
-
)
|
25
|
-
|
26
|
-
import os
|
27
|
-
|
28
|
-
convert_from_path = import_module(
|
29
|
-
"pdf2image", import_name="convert_from_path"
|
30
|
-
)
|
31
|
-
|
32
|
-
# Ensure the output folder exists
|
33
|
-
os.makedirs(output_folder, exist_ok=True)
|
34
|
-
|
35
|
-
# Convert PDF to a list of PIL Image objects
|
36
|
-
images = convert_from_path(pdf_path, dpi=dpi)
|
37
|
-
|
38
|
-
saved_paths = []
|
39
|
-
for i, image in enumerate(images):
|
40
|
-
# Construct the output file name
|
41
|
-
image_file = os.path.join(output_folder, f"page_{i + 1}.{fmt}")
|
42
|
-
image.save(image_file, fmt.upper())
|
43
|
-
saved_paths.append(image_file)
|
44
|
-
|
45
|
-
return saved_paths
|
@@ -1,33 +0,0 @@
|
|
1
|
-
from pathlib import Path
|
2
|
-
|
3
|
-
from lionagi.utils import is_import_installed
|
4
|
-
|
5
|
-
_HAS_OPENCV = is_import_installed("cv2")
|
6
|
-
|
7
|
-
|
8
|
-
__all__ = ("read_image_to_base64",)
|
9
|
-
|
10
|
-
|
11
|
-
def read_image_to_base64(image_path: str | Path) -> str:
|
12
|
-
if not _HAS_OPENCV:
|
13
|
-
raise ModuleNotFoundError(
|
14
|
-
"OpenCV is not installed, please install it with `pip install lionagi[unstructured]`"
|
15
|
-
)
|
16
|
-
|
17
|
-
import base64
|
18
|
-
|
19
|
-
import cv2
|
20
|
-
|
21
|
-
image_path = str(image_path)
|
22
|
-
image = cv2.imread(image_path, cv2.COLOR_BGR2RGB)
|
23
|
-
|
24
|
-
if image is None:
|
25
|
-
raise ValueError(f"Could not read image from path: {image_path}")
|
26
|
-
|
27
|
-
file_extension = "." + image_path.split(".")[-1]
|
28
|
-
|
29
|
-
success, buffer = cv2.imencode(file_extension, image)
|
30
|
-
if not success:
|
31
|
-
raise ValueError(f"Could not encode image to {file_extension} format.")
|
32
|
-
encoded_image = base64.b64encode(buffer).decode("utf-8")
|
33
|
-
return encoded_image
|
@@ -1,144 +0,0 @@
|
|
1
|
-
# Copyright (c) 2023 - 2025, HaiyangLi <quantocean.li at gmail dot com>
|
2
|
-
#
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
4
|
-
|
5
|
-
from collections.abc import Callable, Sequence
|
6
|
-
from typing import Any, Literal
|
7
|
-
|
8
|
-
from lionagi.utils import KeysDict, Params, to_dict, to_json
|
9
|
-
|
10
|
-
from .fuzzy_match_keys import fuzzy_match_keys
|
11
|
-
from .string_similarity import SIMILARITY_TYPE
|
12
|
-
|
13
|
-
|
14
|
-
class FuzzyValidateMappingParams(Params):
|
15
|
-
similarity_algo: SIMILARITY_TYPE | Callable[[str, str], float] = (
|
16
|
-
"jaro_winkler"
|
17
|
-
)
|
18
|
-
similarity_threshold: float = 0.85
|
19
|
-
fuzzy_match: bool = True
|
20
|
-
handle_unmatched: Literal["ignore", "raise", "remove", "fill", "force"] = (
|
21
|
-
"ignore"
|
22
|
-
)
|
23
|
-
fill_value: Any = None
|
24
|
-
fill_mapping: dict[str, Any] | None = None
|
25
|
-
strict: bool = False
|
26
|
-
suppress_conversion_errors: bool = False
|
27
|
-
|
28
|
-
def __call__(
|
29
|
-
self, d_: dict[str, Any], keys: Sequence[str] | KeysDict
|
30
|
-
) -> dict[str, Any]:
|
31
|
-
return fuzzy_validate_mapping(
|
32
|
-
d_,
|
33
|
-
keys,
|
34
|
-
similarity_algo=self.similarity_algo,
|
35
|
-
similarity_threshold=self.similarity_threshold,
|
36
|
-
fuzzy_match=self.fuzzy_match,
|
37
|
-
handle_unmatched=self.handle_unmatched,
|
38
|
-
fill_value=self.fill_value,
|
39
|
-
fill_mapping=self.fill_mapping,
|
40
|
-
strict=self.strict,
|
41
|
-
suppress_conversion_errors=self.suppress_conversion_errors,
|
42
|
-
)
|
43
|
-
|
44
|
-
|
45
|
-
def fuzzy_validate_mapping(
|
46
|
-
d: Any,
|
47
|
-
keys: Sequence[str] | KeysDict,
|
48
|
-
/,
|
49
|
-
*,
|
50
|
-
similarity_algo: (
|
51
|
-
SIMILARITY_TYPE | Callable[[str, str], float]
|
52
|
-
) = "jaro_winkler",
|
53
|
-
similarity_threshold: float = 0.85,
|
54
|
-
fuzzy_match: bool = True,
|
55
|
-
handle_unmatched: Literal[
|
56
|
-
"ignore", "raise", "remove", "fill", "force"
|
57
|
-
] = "ignore",
|
58
|
-
fill_value: Any = None,
|
59
|
-
fill_mapping: dict[str, Any] | None = None,
|
60
|
-
strict: bool = False,
|
61
|
-
suppress_conversion_errors: bool = False,
|
62
|
-
) -> dict[str, Any]:
|
63
|
-
"""
|
64
|
-
Validate and correct any input into a dictionary with expected keys.
|
65
|
-
|
66
|
-
Args:
|
67
|
-
d: Input to validate. Can be:
|
68
|
-
- Dictionary
|
69
|
-
- JSON string or markdown code block
|
70
|
-
- XML string
|
71
|
-
- Object with to_dict/model_dump method
|
72
|
-
- Any type convertible to dictionary
|
73
|
-
keys: List of expected keys or dictionary mapping keys to types.
|
74
|
-
similarity_algo: String similarity algorithm or custom function.
|
75
|
-
similarity_threshold: Minimum similarity score for fuzzy matching.
|
76
|
-
fuzzy_match: If True, use fuzzy matching for key correction.
|
77
|
-
handle_unmatched: How to handle unmatched keys:
|
78
|
-
- "ignore": Keep unmatched keys
|
79
|
-
- "raise": Raise error for unmatched keys
|
80
|
-
- "remove": Remove unmatched keys
|
81
|
-
- "fill": Fill missing keys with default values
|
82
|
-
- "force": Combine "fill" and "remove" behaviors
|
83
|
-
fill_value: Default value for filling unmatched keys.
|
84
|
-
fill_mapping: Dictionary mapping keys to default values.
|
85
|
-
strict: Raise error if any expected key is missing.
|
86
|
-
suppress_conversion_errors: Return empty dict on conversion errors.
|
87
|
-
|
88
|
-
Returns:
|
89
|
-
Validated and corrected dictionary.
|
90
|
-
|
91
|
-
Raises:
|
92
|
-
ValueError: If input cannot be converted or validation fails.
|
93
|
-
TypeError: If input types are invalid.
|
94
|
-
"""
|
95
|
-
if d is None:
|
96
|
-
raise TypeError("Input cannot be None")
|
97
|
-
|
98
|
-
# Try converting to dictionary
|
99
|
-
try:
|
100
|
-
if isinstance(d, str):
|
101
|
-
# First try to_json for JSON strings and code blocks
|
102
|
-
try:
|
103
|
-
json_result = to_json(d)
|
104
|
-
dict_input = (
|
105
|
-
json_result[0]
|
106
|
-
if isinstance(json_result, list)
|
107
|
-
else json_result
|
108
|
-
)
|
109
|
-
except Exception:
|
110
|
-
# Fall back to to_dict for other string formats
|
111
|
-
dict_input = to_dict(
|
112
|
-
d, str_type="json", fuzzy_parse=True, suppress=True
|
113
|
-
)
|
114
|
-
else:
|
115
|
-
dict_input = to_dict(
|
116
|
-
d, use_model_dump=True, fuzzy_parse=True, suppress=True
|
117
|
-
)
|
118
|
-
|
119
|
-
if not isinstance(dict_input, dict):
|
120
|
-
if suppress_conversion_errors:
|
121
|
-
dict_input = {}
|
122
|
-
else:
|
123
|
-
raise ValueError(
|
124
|
-
f"Failed to convert input to dictionary: {type(dict_input)}"
|
125
|
-
)
|
126
|
-
|
127
|
-
except Exception as e:
|
128
|
-
if suppress_conversion_errors:
|
129
|
-
dict_input = {}
|
130
|
-
else:
|
131
|
-
raise ValueError(f"Failed to convert input to dictionary: {e}")
|
132
|
-
|
133
|
-
# Validate the dictionary
|
134
|
-
return fuzzy_match_keys(
|
135
|
-
dict_input,
|
136
|
-
keys,
|
137
|
-
similarity_algo=similarity_algo,
|
138
|
-
similarity_threshold=similarity_threshold,
|
139
|
-
fuzzy_match=fuzzy_match,
|
140
|
-
handle_unmatched=handle_unmatched,
|
141
|
-
fill_value=fill_value,
|
142
|
-
fill_mapping=fill_mapping,
|
143
|
-
strict=strict,
|
144
|
-
)
|
@@ -1,203 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
|
3
|
-
import re
|
4
|
-
import xml.etree.ElementTree as ET
|
5
|
-
from typing import Any
|
6
|
-
|
7
|
-
|
8
|
-
def to_xml(
|
9
|
-
obj: dict | list | str | int | float | bool | None,
|
10
|
-
root_name: str = "root",
|
11
|
-
) -> str:
|
12
|
-
"""
|
13
|
-
Convert a dictionary into an XML formatted string.
|
14
|
-
|
15
|
-
Rules:
|
16
|
-
- A dictionary key becomes an XML tag.
|
17
|
-
- If the dictionary value is:
|
18
|
-
- A primitive type (str, int, float, bool, None): it becomes the text content of the tag.
|
19
|
-
- A list: each element of the list will repeat the same tag.
|
20
|
-
- Another dictionary: it is recursively converted to nested XML.
|
21
|
-
- root_name sets the top-level XML element name.
|
22
|
-
|
23
|
-
Args:
|
24
|
-
obj: The Python object to convert (typically a dictionary).
|
25
|
-
root_name: The name of the root XML element.
|
26
|
-
|
27
|
-
Returns:
|
28
|
-
A string representing the XML.
|
29
|
-
|
30
|
-
Examples:
|
31
|
-
>>> to_xml({"a": 1, "b": {"c": "hello", "d": [10, 20]}}, root_name="data")
|
32
|
-
'<data><a>1</a><b><c>hello</c><d>10</d><d>20</d></b></data>'
|
33
|
-
"""
|
34
|
-
|
35
|
-
def _convert(value: Any, tag_name: str) -> str:
|
36
|
-
# If value is a dict, recursively convert its keys
|
37
|
-
if isinstance(value, dict):
|
38
|
-
inner = "".join(_convert(v, k) for k, v in value.items())
|
39
|
-
return f"<{tag_name}>{inner}</{tag_name}>"
|
40
|
-
# If value is a list, repeat the same tag for each element
|
41
|
-
elif isinstance(value, list):
|
42
|
-
return "".join(_convert(item, tag_name) for item in value)
|
43
|
-
# If value is a primitive, convert to string and place inside tag
|
44
|
-
else:
|
45
|
-
text = "" if value is None else str(value)
|
46
|
-
# Escape special XML characters if needed (minimal)
|
47
|
-
text = (
|
48
|
-
text.replace("&", "&")
|
49
|
-
.replace("<", "<")
|
50
|
-
.replace(">", ">")
|
51
|
-
.replace('"', """)
|
52
|
-
.replace("'", "'")
|
53
|
-
)
|
54
|
-
return f"<{tag_name}>{text}</{tag_name}>"
|
55
|
-
|
56
|
-
# If top-level obj is not a dict, wrap it in one
|
57
|
-
if not isinstance(obj, dict):
|
58
|
-
obj = {root_name: obj}
|
59
|
-
|
60
|
-
inner_xml = "".join(_convert(v, k) for k, v in obj.items())
|
61
|
-
return f"<{root_name}>{inner_xml}</{root_name}>"
|
62
|
-
|
63
|
-
|
64
|
-
class XMLParser:
|
65
|
-
def __init__(self, xml_string: str):
|
66
|
-
self.xml_string = xml_string.strip()
|
67
|
-
self.index = 0
|
68
|
-
|
69
|
-
def parse(self) -> dict[str, Any]:
|
70
|
-
"""Parse the XML string and return the root element as a dictionary."""
|
71
|
-
return self._parse_element()
|
72
|
-
|
73
|
-
def _parse_element(self) -> dict[str, Any]:
|
74
|
-
"""Parse a single XML element and its children."""
|
75
|
-
self._skip_whitespace()
|
76
|
-
if self.xml_string[self.index] != "<":
|
77
|
-
raise ValueError(
|
78
|
-
f"Expected '<', found '{self.xml_string[self.index]}'"
|
79
|
-
)
|
80
|
-
|
81
|
-
tag, attributes = self._parse_opening_tag()
|
82
|
-
children: dict[str, str | list | dict] = {}
|
83
|
-
text = ""
|
84
|
-
|
85
|
-
while self.index < len(self.xml_string):
|
86
|
-
self._skip_whitespace()
|
87
|
-
if self.xml_string.startswith("</", self.index):
|
88
|
-
closing_tag = self._parse_closing_tag()
|
89
|
-
if closing_tag != tag:
|
90
|
-
raise ValueError(
|
91
|
-
f"Mismatched tags: '{tag}' and '{closing_tag}'"
|
92
|
-
)
|
93
|
-
break
|
94
|
-
elif self.xml_string.startswith("<", self.index):
|
95
|
-
child = self._parse_element()
|
96
|
-
child_tag, child_data = next(iter(child.items()))
|
97
|
-
if child_tag in children:
|
98
|
-
if not isinstance(children[child_tag], list):
|
99
|
-
children[child_tag] = [children[child_tag]]
|
100
|
-
children[child_tag].append(child_data)
|
101
|
-
else:
|
102
|
-
children[child_tag] = child_data
|
103
|
-
else:
|
104
|
-
text += self._parse_text()
|
105
|
-
|
106
|
-
result: dict[str, Any] = {}
|
107
|
-
if attributes:
|
108
|
-
result["@attributes"] = attributes
|
109
|
-
if children:
|
110
|
-
result.update(children)
|
111
|
-
elif text.strip():
|
112
|
-
result = text.strip()
|
113
|
-
|
114
|
-
return {tag: result}
|
115
|
-
|
116
|
-
def _parse_opening_tag(self) -> tuple[str, dict[str, str]]:
|
117
|
-
"""Parse an opening XML tag and its attributes."""
|
118
|
-
match = re.match(
|
119
|
-
r'<(\w+)((?:\s+\w+="[^"]*")*)\s*/?>',
|
120
|
-
self.xml_string[self.index :], # noqa
|
121
|
-
)
|
122
|
-
if not match:
|
123
|
-
raise ValueError("Invalid opening tag")
|
124
|
-
self.index += match.end()
|
125
|
-
tag = match.group(1)
|
126
|
-
attributes = dict(re.findall(r'(\w+)="([^"]*)"', match.group(2)))
|
127
|
-
return tag, attributes
|
128
|
-
|
129
|
-
def _parse_closing_tag(self) -> str:
|
130
|
-
"""Parse a closing XML tag."""
|
131
|
-
match = re.match(r"</(\w+)>", self.xml_string[self.index :]) # noqa
|
132
|
-
if not match:
|
133
|
-
raise ValueError("Invalid closing tag")
|
134
|
-
self.index += match.end()
|
135
|
-
return match.group(1)
|
136
|
-
|
137
|
-
def _parse_text(self) -> str:
|
138
|
-
"""Parse text content between XML tags."""
|
139
|
-
start = self.index
|
140
|
-
while (
|
141
|
-
self.index < len(self.xml_string)
|
142
|
-
and self.xml_string[self.index] != "<"
|
143
|
-
):
|
144
|
-
self.index += 1
|
145
|
-
return self.xml_string[start : self.index] # noqa
|
146
|
-
|
147
|
-
def _skip_whitespace(self) -> None:
|
148
|
-
"""Skip any whitespace characters at the current parsing position."""
|
149
|
-
p_ = len(self.xml_string[self.index :]) # noqa
|
150
|
-
m_ = len(self.xml_string[self.index :].lstrip()) # noqa
|
151
|
-
|
152
|
-
self.index += p_ - m_
|
153
|
-
|
154
|
-
|
155
|
-
def xml_to_dict(
|
156
|
-
xml_string: str,
|
157
|
-
/,
|
158
|
-
suppress=False,
|
159
|
-
remove_root: bool = True,
|
160
|
-
root_tag: str = None,
|
161
|
-
) -> dict[str, Any]:
|
162
|
-
"""
|
163
|
-
Parse an XML string into a nested dictionary structure.
|
164
|
-
|
165
|
-
This function converts an XML string into a dictionary where:
|
166
|
-
- Element tags become dictionary keys
|
167
|
-
- Text content is assigned directly to the tag key if there are no children
|
168
|
-
- Attributes are stored in a '@attributes' key
|
169
|
-
- Multiple child elements with the same tag are stored as lists
|
170
|
-
|
171
|
-
Args:
|
172
|
-
xml_string: The XML string to parse.
|
173
|
-
|
174
|
-
Returns:
|
175
|
-
A dictionary representation of the XML structure.
|
176
|
-
|
177
|
-
Raises:
|
178
|
-
ValueError: If the XML is malformed or parsing fails.
|
179
|
-
"""
|
180
|
-
try:
|
181
|
-
a = XMLParser(xml_string).parse()
|
182
|
-
if remove_root and (root_tag or "root") in a:
|
183
|
-
a = a[root_tag or "root"]
|
184
|
-
return a
|
185
|
-
except ValueError as e:
|
186
|
-
if not suppress:
|
187
|
-
raise e
|
188
|
-
|
189
|
-
|
190
|
-
def dict_to_xml(data: dict, /, root_tag: str = "root") -> str:
|
191
|
-
root = ET.Element(root_tag)
|
192
|
-
|
193
|
-
def convert(dict_obj: dict, parent: Any) -> None:
|
194
|
-
for key, val in dict_obj.items():
|
195
|
-
if isinstance(val, dict):
|
196
|
-
element = ET.SubElement(parent, key)
|
197
|
-
convert(dict_obj=val, parent=element)
|
198
|
-
else:
|
199
|
-
element = ET.SubElement(parent, key)
|
200
|
-
element.text = str(object=val)
|
201
|
-
|
202
|
-
convert(dict_obj=data, parent=root)
|
203
|
-
return ET.tostring(root, encoding="unicode")
|