lionagi 0.17.5__py3-none-any.whl → 0.17.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lionagi/__init__.py +5 -2
- lionagi/config.py +26 -0
- lionagi/fields/action.py +5 -3
- lionagi/libs/file/chunk.py +3 -14
- lionagi/libs/file/process.py +10 -92
- lionagi/libs/schema/breakdown_pydantic_annotation.py +45 -0
- lionagi/ln/_async_call.py +6 -6
- lionagi/ln/fuzzy/_fuzzy_match.py +3 -6
- lionagi/ln/fuzzy/_fuzzy_validate.py +3 -4
- lionagi/ln/fuzzy/_string_similarity.py +11 -5
- lionagi/ln/fuzzy/_to_dict.py +19 -19
- lionagi/ln/types.py +15 -0
- lionagi/operations/operate/operate.py +7 -11
- lionagi/operations/parse/parse.py +5 -3
- lionagi/protocols/generic/element.py +3 -6
- lionagi/protocols/generic/event.py +1 -1
- lionagi/protocols/mail/package.py +2 -2
- lionagi/protocols/messages/instruction.py +9 -1
- lionagi/protocols/operatives/operative.py +4 -3
- lionagi/service/__init__.py +67 -8
- lionagi/service/broadcaster.py +61 -0
- lionagi/service/connections/api_calling.py +21 -140
- lionagi/service/hooks/__init__.py +2 -10
- lionagi/service/hooks/_types.py +5 -4
- lionagi/service/hooks/hook_registry.py +11 -11
- lionagi/service/hooks/hooked_event.py +142 -0
- lionagi/service/imodel.py +11 -6
- lionagi/session/branch.py +46 -169
- lionagi/session/session.py +1 -44
- lionagi/tools/file/reader.py +6 -4
- lionagi/utils.py +3 -334
- lionagi/version.py +1 -1
- {lionagi-0.17.5.dist-info → lionagi-0.17.7.dist-info}/METADATA +2 -2
- {lionagi-0.17.5.dist-info → lionagi-0.17.7.dist-info}/RECORD +36 -40
- lionagi/libs/file/_utils.py +0 -10
- lionagi/libs/file/concat.py +0 -121
- lionagi/libs/file/concat_files.py +0 -85
- lionagi/libs/file/file_ops.py +0 -118
- lionagi/libs/file/save.py +0 -103
- lionagi/ln/concurrency/throttle.py +0 -83
- lionagi/settings.py +0 -71
- {lionagi-0.17.5.dist-info → lionagi-0.17.7.dist-info}/WHEEL +0 -0
- {lionagi-0.17.5.dist-info → lionagi-0.17.7.dist-info}/licenses/LICENSE +0 -0
lionagi/__init__.py
CHANGED
@@ -5,17 +5,20 @@
|
|
5
5
|
import logging
|
6
6
|
from typing import TYPE_CHECKING
|
7
7
|
|
8
|
-
from . import ln as ln
|
8
|
+
from . import ln as ln
|
9
9
|
from .version import __version__
|
10
10
|
|
11
11
|
if TYPE_CHECKING:
|
12
|
-
# Type hints only - not imported at runtime
|
13
12
|
from pydantic import BaseModel, Field
|
14
13
|
|
14
|
+
from . import _types as types
|
15
|
+
from .operations.builder import OperationGraphBuilder as Builder
|
15
16
|
from .operations.node import Operation
|
17
|
+
from .protocols.action.manager import load_mcp_tools
|
16
18
|
from .service.imodel import iModel
|
17
19
|
from .session.session import Branch, Session
|
18
20
|
|
21
|
+
|
19
22
|
logger = logging.getLogger(__name__)
|
20
23
|
logger.setLevel(logging.INFO)
|
21
24
|
|
lionagi/config.py
CHANGED
@@ -83,6 +83,17 @@ class AppSettings(BaseSettings, frozen=True):
|
|
83
83
|
LIONAGI_QDRANT_URL: str = "http://localhost:6333"
|
84
84
|
LIONAGI_DEFAULT_QDRANT_COLLECTION: str = "event_logs"
|
85
85
|
|
86
|
+
# Log configuration
|
87
|
+
LOG_PERSIST_DIR: str = "./data/logs"
|
88
|
+
LOG_SUBFOLDER: str | None = None
|
89
|
+
LOG_CAPACITY: int = 50
|
90
|
+
LOG_EXTENSION: str = ".json"
|
91
|
+
LOG_USE_TIMESTAMP: bool = True
|
92
|
+
LOG_HASH_DIGITS: int = 5
|
93
|
+
LOG_FILE_PREFIX: str = "log"
|
94
|
+
LOG_AUTO_SAVE_ON_EXIT: bool = True
|
95
|
+
LOG_CLEAR_AFTER_DUMP: bool = True
|
96
|
+
|
86
97
|
# Class variable to store the singleton instance
|
87
98
|
_instance: ClassVar[Any] = None
|
88
99
|
|
@@ -119,6 +130,21 @@ class AppSettings(BaseSettings, frozen=True):
|
|
119
130
|
|
120
131
|
return str(secret)
|
121
132
|
|
133
|
+
@property
|
134
|
+
def LOG_CONFIG(self) -> dict[str, Any]:
|
135
|
+
"""Get LOG configuration dict compatible with old Settings.Config.LOG format."""
|
136
|
+
return {
|
137
|
+
"persist_dir": self.LOG_PERSIST_DIR,
|
138
|
+
"subfolder": self.LOG_SUBFOLDER,
|
139
|
+
"capacity": self.LOG_CAPACITY,
|
140
|
+
"extension": self.LOG_EXTENSION,
|
141
|
+
"use_timestamp": self.LOG_USE_TIMESTAMP,
|
142
|
+
"hash_digits": self.LOG_HASH_DIGITS,
|
143
|
+
"file_prefix": self.LOG_FILE_PREFIX,
|
144
|
+
"auto_save_on_exit": self.LOG_AUTO_SAVE_ON_EXIT,
|
145
|
+
"clear_after_dump": self.LOG_CLEAR_AFTER_DUMP,
|
146
|
+
}
|
147
|
+
|
122
148
|
|
123
149
|
# Create a singleton instance
|
124
150
|
settings = AppSettings()
|
lionagi/fields/action.py
CHANGED
@@ -11,8 +11,8 @@ from lionagi.libs.validate.common_field_validators import (
|
|
11
11
|
validate_boolean_field,
|
12
12
|
validate_nullable_string_field,
|
13
13
|
)
|
14
|
+
from lionagi.ln import extract_json, to_dict, to_list
|
14
15
|
from lionagi.models import FieldModel, HashableModel
|
15
|
-
from lionagi.utils import to_dict, to_json, to_list
|
16
16
|
|
17
17
|
__all__ = (
|
18
18
|
"ActionRequestModel",
|
@@ -27,11 +27,13 @@ def parse_action_request(content: str | dict) -> list[dict]:
|
|
27
27
|
json_blocks = [content.model_dump()]
|
28
28
|
|
29
29
|
elif isinstance(content, str):
|
30
|
-
json_blocks =
|
30
|
+
json_blocks = extract_json(content, fuzzy_parse=True)
|
31
31
|
if not json_blocks:
|
32
32
|
pattern2 = r"```python\s*(.*?)\s*```"
|
33
33
|
_d = re.findall(pattern2, content, re.DOTALL)
|
34
|
-
json_blocks = [
|
34
|
+
json_blocks = [
|
35
|
+
extract_json(match, fuzzy_parse=True) for match in _d
|
36
|
+
]
|
35
37
|
json_blocks = to_list(json_blocks, dropna=True)
|
36
38
|
|
37
39
|
print(json_blocks)
|
lionagi/libs/file/chunk.py
CHANGED
@@ -28,12 +28,6 @@ def chunk_by_chars(
|
|
28
28
|
|
29
29
|
Raises:
|
30
30
|
ValueError: If an error occurs during the chunking process.
|
31
|
-
|
32
|
-
Examples:
|
33
|
-
>>> text = "This is a sample text for chunking."
|
34
|
-
>>> chunks = chunk_by_chars(text, chunk_size=10, overlap=0.2)
|
35
|
-
>>> print(chunks)
|
36
|
-
['This is a ', 'a sample ', 'le text fo', 'for chunki', 'king.']
|
37
31
|
"""
|
38
32
|
try:
|
39
33
|
n_chunks = math.ceil(len(text) / chunk_size)
|
@@ -112,12 +106,6 @@ def chunk_by_tokens(
|
|
112
106
|
|
113
107
|
Raises:
|
114
108
|
ValueError: If an error occurs during the chunking process.
|
115
|
-
|
116
|
-
Examples:
|
117
|
-
>>> tokens = ["This", "is", "a", "sample", "text", "for", "chunking."]
|
118
|
-
>>> chunks = chunk_by_tokens(tokens, chunk_size=3, overlap=0.2)
|
119
|
-
>>> print(chunks)
|
120
|
-
['This is a', 'a sample text', 'text for chunking.']
|
121
109
|
"""
|
122
110
|
try:
|
123
111
|
n_chunks = math.ceil(len(tokens) / chunk_size)
|
@@ -166,7 +154,9 @@ def _chunk_token_two_parts(
|
|
166
154
|
) -> list[str | list[str]]:
|
167
155
|
"""Handle chunking for two parts."""
|
168
156
|
chunks = [tokens[: chunk_size + overlap_size]]
|
169
|
-
|
157
|
+
# When residue == 0, we have perfect division, create the second chunk
|
158
|
+
# When residue > threshold, the leftover is big enough for a second chunk
|
159
|
+
if residue == 0 or residue > threshold:
|
170
160
|
chunks.append(tokens[chunk_size - overlap_size :])
|
171
161
|
else:
|
172
162
|
return _process_single_chunk(tokens, return_tokens)
|
@@ -237,7 +227,6 @@ def chunk_content(
|
|
237
227
|
metadata (Dict[str, Any]): Metadata to be included with each chunk.
|
238
228
|
kwargs for tokenizer, if needed.
|
239
229
|
|
240
|
-
|
241
230
|
Returns:
|
242
231
|
List[Dict[str, Any]]: A list of dictionaries, each representing a chunk with metadata.
|
243
232
|
"""
|
lionagi/libs/file/process.py
CHANGED
@@ -9,12 +9,9 @@ from pathlib import Path
|
|
9
9
|
from typing import Any, Literal
|
10
10
|
|
11
11
|
from lionagi import ln
|
12
|
+
from lionagi.utils import is_import_installed
|
12
13
|
|
13
|
-
from ._utils import check_docling_available
|
14
14
|
from .chunk import chunk_content
|
15
|
-
from .save import save_chunks
|
16
|
-
|
17
|
-
_HAS_DOCLING = check_docling_available()
|
18
15
|
|
19
16
|
|
20
17
|
def dir_to_files(
|
@@ -90,82 +87,6 @@ def dir_to_files(
|
|
90
87
|
raise ValueError(f"Error processing directory {directory}: {e}") from e
|
91
88
|
|
92
89
|
|
93
|
-
def file_to_chunks(
|
94
|
-
file_path: str | Path,
|
95
|
-
chunk_by: Literal["chars", "tokens"] = "chars",
|
96
|
-
chunk_size: int = 1500,
|
97
|
-
overlap: float = 0.1,
|
98
|
-
threshold: int = 200,
|
99
|
-
encoding: str = "utf-8",
|
100
|
-
custom_metadata: dict[str, Any] | None = None,
|
101
|
-
output_dir: str | Path | None = None,
|
102
|
-
verbose: bool = False,
|
103
|
-
timestamp: bool = True,
|
104
|
-
random_hash_digits: int = 4,
|
105
|
-
as_node: bool = False,
|
106
|
-
) -> list[dict[str, Any]]:
|
107
|
-
"""
|
108
|
-
Process a file and split its content into chunks.
|
109
|
-
|
110
|
-
This function reads a file, splits its content into chunks using the provided
|
111
|
-
chunking function, and optionally saves the chunks to separate files.
|
112
|
-
|
113
|
-
Args:
|
114
|
-
file_path (Union[str, Path]): Path to the file to be processed.
|
115
|
-
chunk_func (Callable): Function to use for chunking the content.
|
116
|
-
chunk_size (int): The target size for each chunk.
|
117
|
-
overlap (float): The fraction of overlap between chunks.
|
118
|
-
threshold (int): The minimum size for the last chunk.
|
119
|
-
encoding (str): File encoding to use when reading the file.
|
120
|
-
custom_metadata (Optional[Dict[str, Any]]): Additional metadata to include with each chunk.
|
121
|
-
output_dir (Optional[Union[str, Path]]): Directory to save output chunks (if provided).
|
122
|
-
verbose (bool): If True, print verbose output.
|
123
|
-
timestamp (bool): If True, include timestamp in output filenames.
|
124
|
-
random_hash_digits (int): Number of random hash digits to include in output filenames.
|
125
|
-
|
126
|
-
Returns:
|
127
|
-
List[Dict[str, Any]]: A list of dictionaries, each representing a chunk with metadata.
|
128
|
-
|
129
|
-
Raises:
|
130
|
-
ValueError: If there's an error processing the file.
|
131
|
-
"""
|
132
|
-
try:
|
133
|
-
if isinstance(file_path, str):
|
134
|
-
file_path = Path(file_path)
|
135
|
-
|
136
|
-
text = file_path.read_text(encoding=encoding)
|
137
|
-
|
138
|
-
metadata = {
|
139
|
-
"file_path": str(file_path),
|
140
|
-
"file_name": file_path.name,
|
141
|
-
"file_size": file_path.stat().st_size,
|
142
|
-
**(custom_metadata or {}),
|
143
|
-
}
|
144
|
-
|
145
|
-
chunks = chunk_content(
|
146
|
-
text,
|
147
|
-
chunk_by=chunk_by,
|
148
|
-
chunk_size=chunk_size,
|
149
|
-
overlap=overlap,
|
150
|
-
threshold=threshold,
|
151
|
-
metadata=metadata,
|
152
|
-
as_node=as_node,
|
153
|
-
)
|
154
|
-
|
155
|
-
if output_dir:
|
156
|
-
save_chunks(
|
157
|
-
chunks=chunks,
|
158
|
-
output_dir=output_dir,
|
159
|
-
verbose=verbose,
|
160
|
-
timestamp=timestamp,
|
161
|
-
random_hash_digits=random_hash_digits,
|
162
|
-
)
|
163
|
-
|
164
|
-
return chunks
|
165
|
-
except Exception as e:
|
166
|
-
raise ValueError(f"Error processing file {file_path}: {e}") from e
|
167
|
-
|
168
|
-
|
169
90
|
def chunk(
|
170
91
|
*,
|
171
92
|
text: str | None = None,
|
@@ -209,12 +130,12 @@ def chunk(
|
|
209
130
|
reader_tool = lambda x: Path(x).read_text(encoding="utf-8")
|
210
131
|
|
211
132
|
if reader_tool == "docling":
|
212
|
-
if
|
213
|
-
raise
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
133
|
+
if not is_import_installed("docling"):
|
134
|
+
raise ImportError(
|
135
|
+
"The 'docling' package is required for this feature. "
|
136
|
+
"Please install it via 'pip install lionagi[reader]'."
|
137
|
+
)
|
138
|
+
from docling.document_converter import DocumentConverter
|
218
139
|
|
219
140
|
converter = DocumentConverter()
|
220
141
|
reader_tool = lambda x: converter.convert(
|
@@ -235,7 +156,7 @@ def chunk(
|
|
235
156
|
threshold=threshold,
|
236
157
|
metadata=metadata,
|
237
158
|
as_node=True,
|
238
|
-
|
159
|
+
output_flatten=True,
|
239
160
|
tokenizer=tokenizer or str.split,
|
240
161
|
)
|
241
162
|
if threshold:
|
@@ -248,15 +169,12 @@ def chunk(
|
|
248
169
|
if output_file.suffix == ".csv":
|
249
170
|
p = Pile(chunks)
|
250
171
|
p.dump(output_file, "csv")
|
251
|
-
|
252
|
-
if output_file.suffix == "json":
|
172
|
+
elif output_file.suffix == ".json":
|
253
173
|
p = Pile(chunks)
|
254
174
|
p.dump(output_file, "json")
|
255
|
-
|
256
|
-
if output_file.suffix == ".parquet":
|
175
|
+
elif output_file.suffix == ".parquet":
|
257
176
|
p = Pile(chunks)
|
258
177
|
p.dump(output_file, "parquet")
|
259
|
-
|
260
178
|
else:
|
261
179
|
raise ValueError(f"Unsupported output file format: {output_file}")
|
262
180
|
|
@@ -0,0 +1,45 @@
|
|
1
|
+
from inspect import isclass
|
2
|
+
from typing import Any, get_args, get_origin
|
3
|
+
|
4
|
+
from pydantic import BaseModel
|
5
|
+
|
6
|
+
|
7
|
+
def breakdown_pydantic_annotation(
|
8
|
+
model: type[BaseModel],
|
9
|
+
max_depth: int | None = None,
|
10
|
+
current_depth: int = 0,
|
11
|
+
) -> dict[str, Any]:
|
12
|
+
if not _is_pydantic_model(model):
|
13
|
+
raise TypeError("Input must be a Pydantic model")
|
14
|
+
|
15
|
+
if max_depth is not None and current_depth >= max_depth:
|
16
|
+
raise RecursionError("Maximum recursion depth reached")
|
17
|
+
|
18
|
+
out: dict[str, Any] = {}
|
19
|
+
for k, v in model.__annotations__.items():
|
20
|
+
origin = get_origin(v)
|
21
|
+
if _is_pydantic_model(v):
|
22
|
+
out[k] = breakdown_pydantic_annotation(
|
23
|
+
v, max_depth, current_depth + 1
|
24
|
+
)
|
25
|
+
elif origin is list:
|
26
|
+
args = get_args(v)
|
27
|
+
if args and _is_pydantic_model(args[0]):
|
28
|
+
out[k] = [
|
29
|
+
breakdown_pydantic_annotation(
|
30
|
+
args[0], max_depth, current_depth + 1
|
31
|
+
)
|
32
|
+
]
|
33
|
+
else:
|
34
|
+
out[k] = [args[0] if args else Any]
|
35
|
+
else:
|
36
|
+
out[k] = v
|
37
|
+
|
38
|
+
return out
|
39
|
+
|
40
|
+
|
41
|
+
def _is_pydantic_model(x: Any) -> bool:
|
42
|
+
try:
|
43
|
+
return isclass(x) and issubclass(x, BaseModel)
|
44
|
+
except TypeError:
|
45
|
+
return False
|
lionagi/ln/_async_call.py
CHANGED
@@ -43,7 +43,7 @@ async def alcall(
|
|
43
43
|
output_unique: bool = False,
|
44
44
|
output_flatten_tuple_set: bool = False,
|
45
45
|
delay_before_start: float = 0,
|
46
|
-
|
46
|
+
retry_initial_delay: float = 0,
|
47
47
|
retry_backoff: float = 1,
|
48
48
|
retry_default: Any = Unset,
|
49
49
|
retry_timeout: float = None,
|
@@ -143,7 +143,7 @@ async def alcall(
|
|
143
143
|
|
144
144
|
async def execute_task(i: Any, index: int) -> Any:
|
145
145
|
attempts = 0
|
146
|
-
current_delay =
|
146
|
+
current_delay = retry_initial_delay
|
147
147
|
while True:
|
148
148
|
try:
|
149
149
|
result = await call_func(i)
|
@@ -223,10 +223,10 @@ async def bcall(
|
|
223
223
|
output_unique: bool = False,
|
224
224
|
output_flatten_tuple_set: bool = False,
|
225
225
|
delay_before_start: float = 0,
|
226
|
-
|
226
|
+
retry_initial_delay: float = 0,
|
227
227
|
retry_backoff: float = 1,
|
228
228
|
retry_default: Any = Unset,
|
229
|
-
retry_timeout: float =
|
229
|
+
retry_timeout: float = None,
|
230
230
|
retry_attempts: int = 0,
|
231
231
|
max_concurrent: int | None = None,
|
232
232
|
throttle_period: float | None = None,
|
@@ -248,7 +248,7 @@ async def bcall(
|
|
248
248
|
output_unique=output_unique,
|
249
249
|
output_flatten_tuple_set=output_flatten_tuple_set,
|
250
250
|
delay_before_start=delay_before_start,
|
251
|
-
|
251
|
+
retry_initial_delay=retry_initial_delay,
|
252
252
|
retry_backoff=retry_backoff,
|
253
253
|
retry_default=retry_default,
|
254
254
|
retry_timeout=retry_timeout,
|
@@ -279,7 +279,7 @@ class AlcallParams(Params):
|
|
279
279
|
|
280
280
|
# retry and timeout
|
281
281
|
delay_before_start: float
|
282
|
-
|
282
|
+
retry_initial_delay: float
|
283
283
|
retry_backoff: float
|
284
284
|
retry_default: Any
|
285
285
|
retry_timeout: float
|
lionagi/ln/fuzzy/_fuzzy_match.py
CHANGED
@@ -1,8 +1,7 @@
|
|
1
|
-
from collections.abc import Sequence
|
2
1
|
from dataclasses import dataclass
|
3
2
|
from typing import Any, ClassVar, Literal
|
4
3
|
|
5
|
-
from ..types import
|
4
|
+
from ..types import KeysLike, Params, Unset
|
6
5
|
from ._string_similarity import (
|
7
6
|
SIMILARITY_ALGO_MAP,
|
8
7
|
SIMILARITY_TYPE,
|
@@ -21,7 +20,7 @@ HandleUnmatched = Literal["ignore", "raise", "remove", "fill", "force"]
|
|
21
20
|
|
22
21
|
def fuzzy_match_keys(
|
23
22
|
d_: dict[str, Any],
|
24
|
-
keys:
|
23
|
+
keys: KeysLike,
|
25
24
|
/,
|
26
25
|
*,
|
27
26
|
similarity_algo: SIMILARITY_TYPE | SimilarityFunc = "jaro_winkler",
|
@@ -166,7 +165,5 @@ class FuzzyMatchKeysParams(Params):
|
|
166
165
|
fill_mapping: dict[str, Any] | Any = Unset
|
167
166
|
strict: bool = False
|
168
167
|
|
169
|
-
def __call__(
|
170
|
-
self, d_: dict[str, Any], keys: Sequence[str] | KeysDict
|
171
|
-
) -> dict[str, Any]:
|
168
|
+
def __call__(self, d_: dict[str, Any], keys: KeysLike) -> dict[str, Any]:
|
172
169
|
return fuzzy_match_keys(d_, keys, **self.default_kw())
|
@@ -1,9 +1,9 @@
|
|
1
|
-
from collections.abc import Callable
|
1
|
+
from collections.abc import Callable
|
2
2
|
from typing import TYPE_CHECKING, Any, Literal
|
3
3
|
|
4
4
|
from lionagi._errors import ValidationError
|
5
5
|
|
6
|
-
from ..types import
|
6
|
+
from ..types import KeysLike
|
7
7
|
from ._extract_json import extract_json
|
8
8
|
from ._fuzzy_match import FuzzyMatchKeysParams, fuzzy_match_keys
|
9
9
|
from ._string_similarity import SIMILARITY_TYPE
|
@@ -56,7 +56,7 @@ def fuzzy_validate_pydantic(
|
|
56
56
|
|
57
57
|
def fuzzy_validate_mapping(
|
58
58
|
d: Any,
|
59
|
-
keys:
|
59
|
+
keys: KeysLike,
|
60
60
|
/,
|
61
61
|
*,
|
62
62
|
similarity_algo: (
|
@@ -110,7 +110,6 @@ def fuzzy_validate_mapping(
|
|
110
110
|
# Try converting to dictionary
|
111
111
|
try:
|
112
112
|
if isinstance(d, str):
|
113
|
-
# First try to_json for JSON strings and code blocks
|
114
113
|
try:
|
115
114
|
json_result = extract_json(
|
116
115
|
d, fuzzy_parse=True, return_one_if_single=True
|
@@ -2,11 +2,13 @@
|
|
2
2
|
#
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
4
4
|
|
5
|
-
from collections.abc import Callable
|
5
|
+
from collections.abc import Callable
|
6
6
|
from dataclasses import dataclass
|
7
|
-
from
|
8
|
-
|
9
|
-
|
7
|
+
from typing import TYPE_CHECKING, Literal
|
8
|
+
|
9
|
+
if TYPE_CHECKING:
|
10
|
+
from collections.abc import Sequence
|
11
|
+
|
10
12
|
|
11
13
|
__all__ = ("string_similarity",)
|
12
14
|
|
@@ -157,6 +159,8 @@ def levenshtein_distance(a: str, b: str) -> int:
|
|
157
159
|
int: Minimum number of single-character edits needed to change one
|
158
160
|
string into the other
|
159
161
|
"""
|
162
|
+
from itertools import product
|
163
|
+
|
160
164
|
if not a:
|
161
165
|
return len(b)
|
162
166
|
if not b:
|
@@ -213,6 +217,8 @@ def sequence_matcher_similarity(s1: str, s2: str) -> float:
|
|
213
217
|
Returns:
|
214
218
|
float: Similarity score between 0 and 1
|
215
219
|
"""
|
220
|
+
from difflib import SequenceMatcher
|
221
|
+
|
216
222
|
return SequenceMatcher(None, s1, s2).ratio()
|
217
223
|
|
218
224
|
|
@@ -249,7 +255,7 @@ class MatchResult:
|
|
249
255
|
|
250
256
|
def string_similarity(
|
251
257
|
word: str,
|
252
|
-
correct_words: Sequence[str],
|
258
|
+
correct_words: "Sequence[str]",
|
253
259
|
algorithm: SIMILARITY_TYPE | Callable[[str, str], float] = "jaro_winkler",
|
254
260
|
threshold: float = 0.0,
|
255
261
|
case_sensitive: bool = False,
|
lionagi/ln/fuzzy/_to_dict.py
CHANGED
@@ -9,10 +9,6 @@ from typing import Any, Literal
|
|
9
9
|
|
10
10
|
from ._fuzzy_json import fuzzy_json
|
11
11
|
|
12
|
-
# ----------------------------
|
13
|
-
# Helpers (small, tight, local)
|
14
|
-
# ----------------------------
|
15
|
-
|
16
12
|
|
17
13
|
def _is_na(obj: Any) -> bool:
|
18
14
|
"""None / Pydantic undefined sentinels -> treat as NA."""
|
@@ -67,7 +63,7 @@ def _parse_str(
|
|
67
63
|
def _object_to_mapping_like(
|
68
64
|
obj: Any,
|
69
65
|
*,
|
70
|
-
|
66
|
+
prioritize_model_dump: bool = True,
|
71
67
|
**kwargs: Any,
|
72
68
|
) -> Mapping | dict | Any:
|
73
69
|
"""
|
@@ -80,11 +76,11 @@ def _object_to_mapping_like(
|
|
80
76
|
5) dict(obj)
|
81
77
|
"""
|
82
78
|
# 1) Pydantic v2
|
83
|
-
if
|
79
|
+
if prioritize_model_dump and hasattr(obj, "model_dump"):
|
84
80
|
return obj.model_dump(**kwargs)
|
85
81
|
|
86
82
|
# 2) Common methods
|
87
|
-
for name in ("to_dict", "dict", "to_json", "json"):
|
83
|
+
for name in ("to_dict", "dict", "to_json", "json", "model_dump"):
|
88
84
|
if hasattr(obj, name):
|
89
85
|
res = getattr(obj, name)(**kwargs)
|
90
86
|
return json.loads(res) if isinstance(res, str) else res
|
@@ -118,7 +114,7 @@ def _preprocess_recursive(
|
|
118
114
|
max_depth: int,
|
119
115
|
recursive_custom_types: bool,
|
120
116
|
str_parse_opts: dict[str, Any],
|
121
|
-
|
117
|
+
prioritize_model_dump: bool,
|
122
118
|
) -> Any:
|
123
119
|
"""
|
124
120
|
Recursively process nested structures:
|
@@ -145,7 +141,7 @@ def _preprocess_recursive(
|
|
145
141
|
max_depth=max_depth,
|
146
142
|
recursive_custom_types=recursive_custom_types,
|
147
143
|
str_parse_opts=str_parse_opts,
|
148
|
-
|
144
|
+
prioritize_model_dump=prioritize_model_dump,
|
149
145
|
)
|
150
146
|
|
151
147
|
# Dict-like
|
@@ -158,7 +154,7 @@ def _preprocess_recursive(
|
|
158
154
|
max_depth=max_depth,
|
159
155
|
recursive_custom_types=recursive_custom_types,
|
160
156
|
str_parse_opts=str_parse_opts,
|
161
|
-
|
157
|
+
prioritize_model_dump=prioritize_model_dump,
|
162
158
|
)
|
163
159
|
for k, v in obj.items()
|
164
160
|
}
|
@@ -172,7 +168,7 @@ def _preprocess_recursive(
|
|
172
168
|
max_depth=max_depth,
|
173
169
|
recursive_custom_types=recursive_custom_types,
|
174
170
|
str_parse_opts=str_parse_opts,
|
175
|
-
|
171
|
+
prioritize_model_dump=prioritize_model_dump,
|
176
172
|
)
|
177
173
|
for v in obj
|
178
174
|
]
|
@@ -198,7 +194,7 @@ def _preprocess_recursive(
|
|
198
194
|
max_depth=max_depth,
|
199
195
|
recursive_custom_types=recursive_custom_types,
|
200
196
|
str_parse_opts=str_parse_opts,
|
201
|
-
|
197
|
+
prioritize_model_dump=prioritize_model_dump,
|
202
198
|
)
|
203
199
|
except Exception:
|
204
200
|
return obj
|
@@ -207,7 +203,7 @@ def _preprocess_recursive(
|
|
207
203
|
if recursive_custom_types:
|
208
204
|
with contextlib.suppress(Exception):
|
209
205
|
mapped = _object_to_mapping_like(
|
210
|
-
obj,
|
206
|
+
obj, prioritize_model_dump=prioritize_model_dump
|
211
207
|
)
|
212
208
|
return _preprocess_recursive(
|
213
209
|
mapped,
|
@@ -215,7 +211,7 @@ def _preprocess_recursive(
|
|
215
211
|
max_depth=max_depth,
|
216
212
|
recursive_custom_types=recursive_custom_types,
|
217
213
|
str_parse_opts=str_parse_opts,
|
218
|
-
|
214
|
+
prioritize_model_dump=prioritize_model_dump,
|
219
215
|
)
|
220
216
|
|
221
217
|
return obj
|
@@ -232,7 +228,7 @@ def _convert_top_level_to_dict(
|
|
232
228
|
fuzzy_parse: bool,
|
233
229
|
str_type: Literal["json", "xml"] | None,
|
234
230
|
parser: Callable[[str], Any] | None,
|
235
|
-
|
231
|
+
prioritize_model_dump: bool,
|
236
232
|
use_enum_values: bool,
|
237
233
|
**kwargs: Any,
|
238
234
|
) -> dict[str, Any]:
|
@@ -273,7 +269,7 @@ def _convert_top_level_to_dict(
|
|
273
269
|
# faithfully following your previous "non-Sequence -> model path" behavior.
|
274
270
|
if not isinstance(obj, Sequence):
|
275
271
|
converted = _object_to_mapping_like(
|
276
|
-
obj,
|
272
|
+
obj, prioritize_model_dump=prioritize_model_dump, **kwargs
|
277
273
|
)
|
278
274
|
# If conversion returned a string, try to parse JSON to mapping; else pass-through
|
279
275
|
if isinstance(converted, str):
|
@@ -321,7 +317,7 @@ def to_dict(
|
|
321
317
|
input_: Any,
|
322
318
|
/,
|
323
319
|
*,
|
324
|
-
|
320
|
+
prioritize_model_dump: bool = True,
|
325
321
|
fuzzy_parse: bool = False,
|
326
322
|
suppress: bool = False,
|
327
323
|
str_type: Literal["json", "xml"] | None = "json",
|
@@ -330,12 +326,16 @@ def to_dict(
|
|
330
326
|
max_recursive_depth: int | None = None,
|
331
327
|
recursive_python_only: bool = True,
|
332
328
|
use_enum_values: bool = False,
|
329
|
+
use_model_dump: bool | None = None, # deprecated
|
333
330
|
**kwargs: Any,
|
334
331
|
) -> dict[str, Any]:
|
335
332
|
"""
|
336
333
|
Convert various input types to a dictionary, with optional recursive processing.
|
337
334
|
Semantics preserved from original implementation.
|
338
335
|
"""
|
336
|
+
if use_model_dump is not None:
|
337
|
+
prioritize_model_dump = use_model_dump
|
338
|
+
|
339
339
|
try:
|
340
340
|
# Clamp recursion depth (match your constraints)
|
341
341
|
if not isinstance(max_recursive_depth, int):
|
@@ -368,7 +368,7 @@ def to_dict(
|
|
368
368
|
max_depth=max_depth,
|
369
369
|
recursive_custom_types=not recursive_python_only,
|
370
370
|
str_parse_opts=str_parse_opts,
|
371
|
-
|
371
|
+
prioritize_model_dump=prioritize_model_dump,
|
372
372
|
)
|
373
373
|
|
374
374
|
# Final top-level conversion
|
@@ -377,7 +377,7 @@ def to_dict(
|
|
377
377
|
fuzzy_parse=fuzzy_parse,
|
378
378
|
str_type=str_type,
|
379
379
|
parser=parser,
|
380
|
-
|
380
|
+
prioritize_model_dump=prioritize_model_dump,
|
381
381
|
use_enum_values=use_enum_values,
|
382
382
|
**kwargs,
|
383
383
|
)
|
lionagi/ln/types.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
from collections.abc import Sequence
|
3
4
|
from dataclasses import dataclass, field
|
4
5
|
from enum import Enum as _Enum
|
5
6
|
from typing import Any, ClassVar, Final, Literal, TypeVar, Union
|
@@ -22,6 +23,7 @@ __all__ = (
|
|
22
23
|
"not_sentinel",
|
23
24
|
"Params",
|
24
25
|
"DataClass",
|
26
|
+
"KeysLike",
|
25
27
|
)
|
26
28
|
|
27
29
|
T = TypeVar("T")
|
@@ -236,6 +238,16 @@ class Params:
|
|
236
238
|
data[k] = v
|
237
239
|
return data
|
238
240
|
|
241
|
+
def __hash__(self) -> int:
|
242
|
+
from ._hash import hash_dict
|
243
|
+
|
244
|
+
return hash_dict(self.to_dict())
|
245
|
+
|
246
|
+
def __eq__(self, other: Any) -> bool:
|
247
|
+
if not isinstance(other, Params):
|
248
|
+
return False
|
249
|
+
return hash(self) == hash(other)
|
250
|
+
|
239
251
|
|
240
252
|
@dataclass(slots=True)
|
241
253
|
class DataClass:
|
@@ -297,3 +309,6 @@ class DataClass:
|
|
297
309
|
if value is None and cls._none_as_sentinel:
|
298
310
|
return True
|
299
311
|
return is_sentinel(value)
|
312
|
+
|
313
|
+
|
314
|
+
KeysLike = Sequence[str] | KeysDict
|