lionagi 0.17.4__py3-none-any.whl → 0.17.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lionagi/__init__.py +45 -7
- lionagi/config.py +26 -0
- lionagi/fields/action.py +5 -3
- lionagi/libs/file/chunk.py +3 -14
- lionagi/libs/file/process.py +10 -92
- lionagi/libs/schema/breakdown_pydantic_annotation.py +45 -0
- lionagi/ln/_async_call.py +19 -8
- lionagi/ln/_hash.py +12 -2
- lionagi/ln/_to_list.py +23 -12
- lionagi/ln/fuzzy/_fuzzy_match.py +3 -6
- lionagi/ln/fuzzy/_fuzzy_validate.py +9 -8
- lionagi/ln/fuzzy/_string_similarity.py +11 -5
- lionagi/ln/fuzzy/_to_dict.py +19 -19
- lionagi/ln/types.py +15 -0
- lionagi/operations/operate/operate.py +7 -11
- lionagi/operations/parse/parse.py +5 -3
- lionagi/protocols/generic/element.py +3 -6
- lionagi/protocols/generic/event.py +1 -1
- lionagi/protocols/mail/package.py +2 -2
- lionagi/protocols/messages/instruction.py +9 -1
- lionagi/protocols/operatives/operative.py +4 -3
- lionagi/service/broadcaster.py +61 -0
- lionagi/service/connections/api_calling.py +22 -145
- lionagi/service/connections/mcp/wrapper.py +8 -15
- lionagi/service/hooks/__init__.py +2 -10
- lionagi/service/hooks/_types.py +1 -0
- lionagi/service/hooks/hooked_event.py +142 -0
- lionagi/service/imodel.py +2 -2
- lionagi/session/branch.py +46 -169
- lionagi/session/session.py +1 -44
- lionagi/tools/file/reader.py +6 -4
- lionagi/utils.py +3 -342
- lionagi/version.py +1 -1
- {lionagi-0.17.4.dist-info → lionagi-0.17.6.dist-info}/METADATA +4 -4
- {lionagi-0.17.4.dist-info → lionagi-0.17.6.dist-info}/RECORD +37 -41
- lionagi/libs/file/_utils.py +0 -10
- lionagi/libs/file/concat.py +0 -121
- lionagi/libs/file/concat_files.py +0 -85
- lionagi/libs/file/file_ops.py +0 -118
- lionagi/libs/file/save.py +0 -103
- lionagi/ln/concurrency/throttle.py +0 -83
- lionagi/settings.py +0 -71
- {lionagi-0.17.4.dist-info → lionagi-0.17.6.dist-info}/WHEEL +0 -0
- {lionagi-0.17.4.dist-info → lionagi-0.17.6.dist-info}/licenses/LICENSE +0 -0
lionagi/__init__.py
CHANGED
@@ -3,15 +3,19 @@
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
4
4
|
|
5
5
|
import logging
|
6
|
+
from typing import TYPE_CHECKING
|
6
7
|
|
7
|
-
from
|
8
|
-
|
9
|
-
from . import ln as ln
|
10
|
-
from .operations.node import Operation
|
11
|
-
from .service.imodel import iModel
|
12
|
-
from .session.session import Branch, Session
|
8
|
+
from . import ln as ln # Lightweight concurrency utilities
|
13
9
|
from .version import __version__
|
14
10
|
|
11
|
+
if TYPE_CHECKING:
|
12
|
+
# Type hints only - not imported at runtime
|
13
|
+
from pydantic import BaseModel, Field
|
14
|
+
|
15
|
+
from .operations.node import Operation
|
16
|
+
from .service.imodel import iModel
|
17
|
+
from .session.session import Branch, Session
|
18
|
+
|
15
19
|
logger = logging.getLogger(__name__)
|
16
20
|
logger.setLevel(logging.INFO)
|
17
21
|
|
@@ -24,7 +28,40 @@ def __getattr__(name: str):
|
|
24
28
|
if name in _lazy_imports:
|
25
29
|
return _lazy_imports[name]
|
26
30
|
|
27
|
-
|
31
|
+
# Lazy load core components
|
32
|
+
if name == "Session":
|
33
|
+
from .session.session import Session
|
34
|
+
|
35
|
+
_lazy_imports[name] = Session
|
36
|
+
return Session
|
37
|
+
elif name == "Branch":
|
38
|
+
from .session.session import Branch
|
39
|
+
|
40
|
+
_lazy_imports[name] = Branch
|
41
|
+
return Branch
|
42
|
+
# Lazy load Pydantic components
|
43
|
+
elif name == "BaseModel":
|
44
|
+
from pydantic import BaseModel
|
45
|
+
|
46
|
+
_lazy_imports[name] = BaseModel
|
47
|
+
return BaseModel
|
48
|
+
elif name == "Field":
|
49
|
+
from pydantic import Field
|
50
|
+
|
51
|
+
_lazy_imports[name] = Field
|
52
|
+
return Field
|
53
|
+
# Lazy load operations
|
54
|
+
elif name == "Operation":
|
55
|
+
from .operations.node import Operation
|
56
|
+
|
57
|
+
_lazy_imports[name] = Operation
|
58
|
+
return Operation
|
59
|
+
elif name == "iModel":
|
60
|
+
from .service.imodel import iModel
|
61
|
+
|
62
|
+
_lazy_imports[name] = iModel
|
63
|
+
return iModel
|
64
|
+
elif name == "types":
|
28
65
|
from . import _types as types
|
29
66
|
|
30
67
|
_lazy_imports["types"] = types
|
@@ -54,5 +91,6 @@ __all__ = (
|
|
54
91
|
"logger",
|
55
92
|
"Builder",
|
56
93
|
"Operation",
|
94
|
+
"load_mcp_tools",
|
57
95
|
"ln",
|
58
96
|
)
|
lionagi/config.py
CHANGED
@@ -83,6 +83,17 @@ class AppSettings(BaseSettings, frozen=True):
|
|
83
83
|
LIONAGI_QDRANT_URL: str = "http://localhost:6333"
|
84
84
|
LIONAGI_DEFAULT_QDRANT_COLLECTION: str = "event_logs"
|
85
85
|
|
86
|
+
# Log configuration
|
87
|
+
LOG_PERSIST_DIR: str = "./data/logs"
|
88
|
+
LOG_SUBFOLDER: str | None = None
|
89
|
+
LOG_CAPACITY: int = 50
|
90
|
+
LOG_EXTENSION: str = ".json"
|
91
|
+
LOG_USE_TIMESTAMP: bool = True
|
92
|
+
LOG_HASH_DIGITS: int = 5
|
93
|
+
LOG_FILE_PREFIX: str = "log"
|
94
|
+
LOG_AUTO_SAVE_ON_EXIT: bool = True
|
95
|
+
LOG_CLEAR_AFTER_DUMP: bool = True
|
96
|
+
|
86
97
|
# Class variable to store the singleton instance
|
87
98
|
_instance: ClassVar[Any] = None
|
88
99
|
|
@@ -119,6 +130,21 @@ class AppSettings(BaseSettings, frozen=True):
|
|
119
130
|
|
120
131
|
return str(secret)
|
121
132
|
|
133
|
+
@property
|
134
|
+
def LOG_CONFIG(self) -> dict[str, Any]:
|
135
|
+
"""Get LOG configuration dict compatible with old Settings.Config.LOG format."""
|
136
|
+
return {
|
137
|
+
"persist_dir": self.LOG_PERSIST_DIR,
|
138
|
+
"subfolder": self.LOG_SUBFOLDER,
|
139
|
+
"capacity": self.LOG_CAPACITY,
|
140
|
+
"extension": self.LOG_EXTENSION,
|
141
|
+
"use_timestamp": self.LOG_USE_TIMESTAMP,
|
142
|
+
"hash_digits": self.LOG_HASH_DIGITS,
|
143
|
+
"file_prefix": self.LOG_FILE_PREFIX,
|
144
|
+
"auto_save_on_exit": self.LOG_AUTO_SAVE_ON_EXIT,
|
145
|
+
"clear_after_dump": self.LOG_CLEAR_AFTER_DUMP,
|
146
|
+
}
|
147
|
+
|
122
148
|
|
123
149
|
# Create a singleton instance
|
124
150
|
settings = AppSettings()
|
lionagi/fields/action.py
CHANGED
@@ -11,8 +11,8 @@ from lionagi.libs.validate.common_field_validators import (
|
|
11
11
|
validate_boolean_field,
|
12
12
|
validate_nullable_string_field,
|
13
13
|
)
|
14
|
+
from lionagi.ln import extract_json, to_dict, to_list
|
14
15
|
from lionagi.models import FieldModel, HashableModel
|
15
|
-
from lionagi.utils import to_dict, to_json, to_list
|
16
16
|
|
17
17
|
__all__ = (
|
18
18
|
"ActionRequestModel",
|
@@ -27,11 +27,13 @@ def parse_action_request(content: str | dict) -> list[dict]:
|
|
27
27
|
json_blocks = [content.model_dump()]
|
28
28
|
|
29
29
|
elif isinstance(content, str):
|
30
|
-
json_blocks =
|
30
|
+
json_blocks = extract_json(content, fuzzy_parse=True)
|
31
31
|
if not json_blocks:
|
32
32
|
pattern2 = r"```python\s*(.*?)\s*```"
|
33
33
|
_d = re.findall(pattern2, content, re.DOTALL)
|
34
|
-
json_blocks = [
|
34
|
+
json_blocks = [
|
35
|
+
extract_json(match, fuzzy_parse=True) for match in _d
|
36
|
+
]
|
35
37
|
json_blocks = to_list(json_blocks, dropna=True)
|
36
38
|
|
37
39
|
print(json_blocks)
|
lionagi/libs/file/chunk.py
CHANGED
@@ -28,12 +28,6 @@ def chunk_by_chars(
|
|
28
28
|
|
29
29
|
Raises:
|
30
30
|
ValueError: If an error occurs during the chunking process.
|
31
|
-
|
32
|
-
Examples:
|
33
|
-
>>> text = "This is a sample text for chunking."
|
34
|
-
>>> chunks = chunk_by_chars(text, chunk_size=10, overlap=0.2)
|
35
|
-
>>> print(chunks)
|
36
|
-
['This is a ', 'a sample ', 'le text fo', 'for chunki', 'king.']
|
37
31
|
"""
|
38
32
|
try:
|
39
33
|
n_chunks = math.ceil(len(text) / chunk_size)
|
@@ -112,12 +106,6 @@ def chunk_by_tokens(
|
|
112
106
|
|
113
107
|
Raises:
|
114
108
|
ValueError: If an error occurs during the chunking process.
|
115
|
-
|
116
|
-
Examples:
|
117
|
-
>>> tokens = ["This", "is", "a", "sample", "text", "for", "chunking."]
|
118
|
-
>>> chunks = chunk_by_tokens(tokens, chunk_size=3, overlap=0.2)
|
119
|
-
>>> print(chunks)
|
120
|
-
['This is a', 'a sample text', 'text for chunking.']
|
121
109
|
"""
|
122
110
|
try:
|
123
111
|
n_chunks = math.ceil(len(tokens) / chunk_size)
|
@@ -166,7 +154,9 @@ def _chunk_token_two_parts(
|
|
166
154
|
) -> list[str | list[str]]:
|
167
155
|
"""Handle chunking for two parts."""
|
168
156
|
chunks = [tokens[: chunk_size + overlap_size]]
|
169
|
-
|
157
|
+
# When residue == 0, we have perfect division, create the second chunk
|
158
|
+
# When residue > threshold, the leftover is big enough for a second chunk
|
159
|
+
if residue == 0 or residue > threshold:
|
170
160
|
chunks.append(tokens[chunk_size - overlap_size :])
|
171
161
|
else:
|
172
162
|
return _process_single_chunk(tokens, return_tokens)
|
@@ -237,7 +227,6 @@ def chunk_content(
|
|
237
227
|
metadata (Dict[str, Any]): Metadata to be included with each chunk.
|
238
228
|
kwargs for tokenizer, if needed.
|
239
229
|
|
240
|
-
|
241
230
|
Returns:
|
242
231
|
List[Dict[str, Any]]: A list of dictionaries, each representing a chunk with metadata.
|
243
232
|
"""
|
lionagi/libs/file/process.py
CHANGED
@@ -9,12 +9,9 @@ from pathlib import Path
|
|
9
9
|
from typing import Any, Literal
|
10
10
|
|
11
11
|
from lionagi import ln
|
12
|
+
from lionagi.utils import is_import_installed
|
12
13
|
|
13
|
-
from ._utils import check_docling_available
|
14
14
|
from .chunk import chunk_content
|
15
|
-
from .save import save_chunks
|
16
|
-
|
17
|
-
_HAS_DOCLING = check_docling_available()
|
18
15
|
|
19
16
|
|
20
17
|
def dir_to_files(
|
@@ -90,82 +87,6 @@ def dir_to_files(
|
|
90
87
|
raise ValueError(f"Error processing directory {directory}: {e}") from e
|
91
88
|
|
92
89
|
|
93
|
-
def file_to_chunks(
|
94
|
-
file_path: str | Path,
|
95
|
-
chunk_by: Literal["chars", "tokens"] = "chars",
|
96
|
-
chunk_size: int = 1500,
|
97
|
-
overlap: float = 0.1,
|
98
|
-
threshold: int = 200,
|
99
|
-
encoding: str = "utf-8",
|
100
|
-
custom_metadata: dict[str, Any] | None = None,
|
101
|
-
output_dir: str | Path | None = None,
|
102
|
-
verbose: bool = False,
|
103
|
-
timestamp: bool = True,
|
104
|
-
random_hash_digits: int = 4,
|
105
|
-
as_node: bool = False,
|
106
|
-
) -> list[dict[str, Any]]:
|
107
|
-
"""
|
108
|
-
Process a file and split its content into chunks.
|
109
|
-
|
110
|
-
This function reads a file, splits its content into chunks using the provided
|
111
|
-
chunking function, and optionally saves the chunks to separate files.
|
112
|
-
|
113
|
-
Args:
|
114
|
-
file_path (Union[str, Path]): Path to the file to be processed.
|
115
|
-
chunk_func (Callable): Function to use for chunking the content.
|
116
|
-
chunk_size (int): The target size for each chunk.
|
117
|
-
overlap (float): The fraction of overlap between chunks.
|
118
|
-
threshold (int): The minimum size for the last chunk.
|
119
|
-
encoding (str): File encoding to use when reading the file.
|
120
|
-
custom_metadata (Optional[Dict[str, Any]]): Additional metadata to include with each chunk.
|
121
|
-
output_dir (Optional[Union[str, Path]]): Directory to save output chunks (if provided).
|
122
|
-
verbose (bool): If True, print verbose output.
|
123
|
-
timestamp (bool): If True, include timestamp in output filenames.
|
124
|
-
random_hash_digits (int): Number of random hash digits to include in output filenames.
|
125
|
-
|
126
|
-
Returns:
|
127
|
-
List[Dict[str, Any]]: A list of dictionaries, each representing a chunk with metadata.
|
128
|
-
|
129
|
-
Raises:
|
130
|
-
ValueError: If there's an error processing the file.
|
131
|
-
"""
|
132
|
-
try:
|
133
|
-
if isinstance(file_path, str):
|
134
|
-
file_path = Path(file_path)
|
135
|
-
|
136
|
-
text = file_path.read_text(encoding=encoding)
|
137
|
-
|
138
|
-
metadata = {
|
139
|
-
"file_path": str(file_path),
|
140
|
-
"file_name": file_path.name,
|
141
|
-
"file_size": file_path.stat().st_size,
|
142
|
-
**(custom_metadata or {}),
|
143
|
-
}
|
144
|
-
|
145
|
-
chunks = chunk_content(
|
146
|
-
text,
|
147
|
-
chunk_by=chunk_by,
|
148
|
-
chunk_size=chunk_size,
|
149
|
-
overlap=overlap,
|
150
|
-
threshold=threshold,
|
151
|
-
metadata=metadata,
|
152
|
-
as_node=as_node,
|
153
|
-
)
|
154
|
-
|
155
|
-
if output_dir:
|
156
|
-
save_chunks(
|
157
|
-
chunks=chunks,
|
158
|
-
output_dir=output_dir,
|
159
|
-
verbose=verbose,
|
160
|
-
timestamp=timestamp,
|
161
|
-
random_hash_digits=random_hash_digits,
|
162
|
-
)
|
163
|
-
|
164
|
-
return chunks
|
165
|
-
except Exception as e:
|
166
|
-
raise ValueError(f"Error processing file {file_path}: {e}") from e
|
167
|
-
|
168
|
-
|
169
90
|
def chunk(
|
170
91
|
*,
|
171
92
|
text: str | None = None,
|
@@ -209,12 +130,12 @@ def chunk(
|
|
209
130
|
reader_tool = lambda x: Path(x).read_text(encoding="utf-8")
|
210
131
|
|
211
132
|
if reader_tool == "docling":
|
212
|
-
if
|
213
|
-
raise
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
133
|
+
if not is_import_installed("docling"):
|
134
|
+
raise ImportError(
|
135
|
+
"The 'docling' package is required for this feature. "
|
136
|
+
"Please install it via 'pip install lionagi[reader]'."
|
137
|
+
)
|
138
|
+
from docling.document_converter import DocumentConverter
|
218
139
|
|
219
140
|
converter = DocumentConverter()
|
220
141
|
reader_tool = lambda x: converter.convert(
|
@@ -235,7 +156,7 @@ def chunk(
|
|
235
156
|
threshold=threshold,
|
236
157
|
metadata=metadata,
|
237
158
|
as_node=True,
|
238
|
-
|
159
|
+
output_flatten=True,
|
239
160
|
tokenizer=tokenizer or str.split,
|
240
161
|
)
|
241
162
|
if threshold:
|
@@ -248,15 +169,12 @@ def chunk(
|
|
248
169
|
if output_file.suffix == ".csv":
|
249
170
|
p = Pile(chunks)
|
250
171
|
p.dump(output_file, "csv")
|
251
|
-
|
252
|
-
if output_file.suffix == "json":
|
172
|
+
elif output_file.suffix == ".json":
|
253
173
|
p = Pile(chunks)
|
254
174
|
p.dump(output_file, "json")
|
255
|
-
|
256
|
-
if output_file.suffix == ".parquet":
|
175
|
+
elif output_file.suffix == ".parquet":
|
257
176
|
p = Pile(chunks)
|
258
177
|
p.dump(output_file, "parquet")
|
259
|
-
|
260
178
|
else:
|
261
179
|
raise ValueError(f"Unsupported output file format: {output_file}")
|
262
180
|
|
@@ -0,0 +1,45 @@
|
|
1
|
+
from inspect import isclass
|
2
|
+
from typing import Any, get_args, get_origin
|
3
|
+
|
4
|
+
from pydantic import BaseModel
|
5
|
+
|
6
|
+
|
7
|
+
def breakdown_pydantic_annotation(
|
8
|
+
model: type[BaseModel],
|
9
|
+
max_depth: int | None = None,
|
10
|
+
current_depth: int = 0,
|
11
|
+
) -> dict[str, Any]:
|
12
|
+
if not _is_pydantic_model(model):
|
13
|
+
raise TypeError("Input must be a Pydantic model")
|
14
|
+
|
15
|
+
if max_depth is not None and current_depth >= max_depth:
|
16
|
+
raise RecursionError("Maximum recursion depth reached")
|
17
|
+
|
18
|
+
out: dict[str, Any] = {}
|
19
|
+
for k, v in model.__annotations__.items():
|
20
|
+
origin = get_origin(v)
|
21
|
+
if _is_pydantic_model(v):
|
22
|
+
out[k] = breakdown_pydantic_annotation(
|
23
|
+
v, max_depth, current_depth + 1
|
24
|
+
)
|
25
|
+
elif origin is list:
|
26
|
+
args = get_args(v)
|
27
|
+
if args and _is_pydantic_model(args[0]):
|
28
|
+
out[k] = [
|
29
|
+
breakdown_pydantic_annotation(
|
30
|
+
args[0], max_depth, current_depth + 1
|
31
|
+
)
|
32
|
+
]
|
33
|
+
else:
|
34
|
+
out[k] = [args[0] if args else Any]
|
35
|
+
else:
|
36
|
+
out[k] = v
|
37
|
+
|
38
|
+
return out
|
39
|
+
|
40
|
+
|
41
|
+
def _is_pydantic_model(x: Any) -> bool:
|
42
|
+
try:
|
43
|
+
return isclass(x) and issubclass(x, BaseModel)
|
44
|
+
except TypeError:
|
45
|
+
return False
|
lionagi/ln/_async_call.py
CHANGED
@@ -5,7 +5,6 @@ from typing import Any, ClassVar
|
|
5
5
|
|
6
6
|
import anyio
|
7
7
|
import anyio.to_thread
|
8
|
-
from pydantic import BaseModel
|
9
8
|
|
10
9
|
from ._to_list import to_list
|
11
10
|
from .concurrency import Lock as ConcurrencyLock
|
@@ -18,6 +17,10 @@ from .concurrency import (
|
|
18
17
|
)
|
19
18
|
from .types import Params, T, Unset, not_sentinel
|
20
19
|
|
20
|
+
_INITIALIZED = False
|
21
|
+
_MODEL_LIKE = None
|
22
|
+
|
23
|
+
|
21
24
|
__all__ = (
|
22
25
|
"alcall",
|
23
26
|
"bcall",
|
@@ -40,7 +43,7 @@ async def alcall(
|
|
40
43
|
output_unique: bool = False,
|
41
44
|
output_flatten_tuple_set: bool = False,
|
42
45
|
delay_before_start: float = 0,
|
43
|
-
|
46
|
+
retry_initial_delay: float = 0,
|
44
47
|
retry_backoff: float = 1,
|
45
48
|
retry_default: Any = Unset,
|
46
49
|
retry_timeout: float = None,
|
@@ -54,6 +57,14 @@ async def alcall(
|
|
54
57
|
retries, timeout, and output processing.
|
55
58
|
"""
|
56
59
|
|
60
|
+
global _INITIALIZED, _MODEL_LIKE
|
61
|
+
if _INITIALIZED is False:
|
62
|
+
from msgspec import Struct
|
63
|
+
from pydantic import BaseModel
|
64
|
+
|
65
|
+
_MODEL_LIKE = (BaseModel, Struct)
|
66
|
+
_INITIALIZED = True
|
67
|
+
|
57
68
|
# Validate func is a single callable
|
58
69
|
if not callable(func):
|
59
70
|
# If func is not callable, maybe it's an iterable. Extract one callable if possible.
|
@@ -82,7 +93,7 @@ async def alcall(
|
|
82
93
|
else:
|
83
94
|
if not isinstance(input_, list):
|
84
95
|
# Attempt to iterate
|
85
|
-
if isinstance(input_,
|
96
|
+
if isinstance(input_, _MODEL_LIKE):
|
86
97
|
# Pydantic model, convert to list
|
87
98
|
input_ = [input_]
|
88
99
|
else:
|
@@ -132,7 +143,7 @@ async def alcall(
|
|
132
143
|
|
133
144
|
async def execute_task(i: Any, index: int) -> Any:
|
134
145
|
attempts = 0
|
135
|
-
current_delay =
|
146
|
+
current_delay = retry_initial_delay
|
136
147
|
while True:
|
137
148
|
try:
|
138
149
|
result = await call_func(i)
|
@@ -212,10 +223,10 @@ async def bcall(
|
|
212
223
|
output_unique: bool = False,
|
213
224
|
output_flatten_tuple_set: bool = False,
|
214
225
|
delay_before_start: float = 0,
|
215
|
-
|
226
|
+
retry_initial_delay: float = 0,
|
216
227
|
retry_backoff: float = 1,
|
217
228
|
retry_default: Any = Unset,
|
218
|
-
retry_timeout: float =
|
229
|
+
retry_timeout: float = None,
|
219
230
|
retry_attempts: int = 0,
|
220
231
|
max_concurrent: int | None = None,
|
221
232
|
throttle_period: float | None = None,
|
@@ -237,7 +248,7 @@ async def bcall(
|
|
237
248
|
output_unique=output_unique,
|
238
249
|
output_flatten_tuple_set=output_flatten_tuple_set,
|
239
250
|
delay_before_start=delay_before_start,
|
240
|
-
|
251
|
+
retry_initial_delay=retry_initial_delay,
|
241
252
|
retry_backoff=retry_backoff,
|
242
253
|
retry_default=retry_default,
|
243
254
|
retry_timeout=retry_timeout,
|
@@ -268,7 +279,7 @@ class AlcallParams(Params):
|
|
268
279
|
|
269
280
|
# retry and timeout
|
270
281
|
delay_before_start: float
|
271
|
-
|
282
|
+
retry_initial_delay: float
|
272
283
|
retry_backoff: float
|
273
284
|
retry_default: Any
|
274
285
|
retry_timeout: float
|
lionagi/ln/_hash.py
CHANGED
@@ -3,10 +3,13 @@ from __future__ import annotations
|
|
3
3
|
import copy
|
4
4
|
|
5
5
|
import msgspec
|
6
|
-
from pydantic import BaseModel as PydanticBaseModel
|
7
6
|
|
8
7
|
__all__ = ("hash_dict",)
|
9
8
|
|
9
|
+
# Global initialization state
|
10
|
+
_INITIALIZED = False
|
11
|
+
PydanticBaseModel = None
|
12
|
+
|
10
13
|
# --- Canonical Representation Generator ---
|
11
14
|
_PRIMITIVE_TYPES = (str, int, float, bool, type(None))
|
12
15
|
_TYPE_MARKER_DICT = 0
|
@@ -35,7 +38,7 @@ def _generate_hashable_representation(item: any) -> any:
|
|
35
38
|
_generate_hashable_representation(msgspec.to_builtins(item)),
|
36
39
|
)
|
37
40
|
|
38
|
-
if isinstance(item, PydanticBaseModel):
|
41
|
+
if PydanticBaseModel and isinstance(item, PydanticBaseModel):
|
39
42
|
# Process the Pydantic model by first dumping it to a dict, then processing that dict.
|
40
43
|
# The type marker distinguishes this from a regular dictionary.
|
41
44
|
return (
|
@@ -117,6 +120,13 @@ def _generate_hashable_representation(item: any) -> any:
|
|
117
120
|
|
118
121
|
|
119
122
|
def hash_dict(data: any, strict: bool = False) -> int:
|
123
|
+
global _INITIALIZED, PydanticBaseModel
|
124
|
+
if _INITIALIZED is False:
|
125
|
+
from pydantic import BaseModel
|
126
|
+
|
127
|
+
PydanticBaseModel = BaseModel
|
128
|
+
_INITIALIZED = True
|
129
|
+
|
120
130
|
data_to_process = data
|
121
131
|
if strict:
|
122
132
|
data_to_process = copy.deepcopy(data)
|
lionagi/ln/_to_list.py
CHANGED
@@ -3,24 +3,20 @@ from dataclasses import dataclass
|
|
3
3
|
from enum import Enum as _Enum
|
4
4
|
from typing import Any, ClassVar
|
5
5
|
|
6
|
-
from msgspec import Struct
|
7
|
-
from pydantic import BaseModel
|
8
|
-
from pydantic_core import PydanticUndefinedType
|
9
|
-
|
10
6
|
from ._hash import hash_dict
|
11
|
-
from .types import Params
|
7
|
+
from .types import Params
|
12
8
|
|
13
9
|
__all__ = ("to_list", "ToListParams")
|
14
10
|
|
15
11
|
|
12
|
+
_INITIALIZED = False
|
13
|
+
_MODEL_LIKE = None
|
14
|
+
_MAP_LIKE = None
|
15
|
+
_SINGLETONE_TYPES = None
|
16
|
+
_SKIP_TYPE = None
|
17
|
+
_SKIP_TUPLE_SET = None
|
16
18
|
_BYTE_LIKE = (str, bytes, bytearray)
|
17
|
-
_MODEL_LIKE = (BaseModel, Struct)
|
18
|
-
_MAP_LIKE = (Mapping, *_MODEL_LIKE)
|
19
19
|
_TUPLE_SET = (tuple, set, frozenset)
|
20
|
-
_SINGLETONE_TYPES = (UndefinedType, UnsetType, PydanticUndefinedType)
|
21
|
-
|
22
|
-
_SKIP_TYPE = (*_BYTE_LIKE, *_MAP_LIKE, _Enum)
|
23
|
-
_SKIP_TUPLE_SET = (*_SKIP_TYPE, *_TUPLE_SET)
|
24
20
|
|
25
21
|
|
26
22
|
def to_list(
|
@@ -50,6 +46,21 @@ def to_list(
|
|
50
46
|
Raises:
|
51
47
|
ValueError: If unique=True is used without flatten=True.
|
52
48
|
"""
|
49
|
+
global _INITIALIZED
|
50
|
+
if _INITIALIZED is False:
|
51
|
+
from msgspec import Struct
|
52
|
+
from pydantic import BaseModel
|
53
|
+
from pydantic_core import PydanticUndefinedType
|
54
|
+
|
55
|
+
from .types import UndefinedType, UnsetType
|
56
|
+
|
57
|
+
global _MODEL_LIKE, _MAP_LIKE, _SINGLETONE_TYPES, _SKIP_TYPE, _SKIP_TUPLE_SET
|
58
|
+
_MODEL_LIKE = (BaseModel, Struct)
|
59
|
+
_MAP_LIKE = (Mapping, *_MODEL_LIKE)
|
60
|
+
_SINGLETONE_TYPES = (UndefinedType, UnsetType, PydanticUndefinedType)
|
61
|
+
_SKIP_TYPE = (*_BYTE_LIKE, *_MAP_LIKE, _Enum)
|
62
|
+
_SKIP_TUPLE_SET = (*_SKIP_TYPE, *_TUPLE_SET)
|
63
|
+
_INITIALIZED = True
|
53
64
|
|
54
65
|
def _process_list(
|
55
66
|
lst: list[Any],
|
@@ -117,7 +128,7 @@ def to_list(
|
|
117
128
|
else [input_]
|
118
129
|
)
|
119
130
|
|
120
|
-
if isinstance(input_,
|
131
|
+
if isinstance(input_, _MODEL_LIKE):
|
121
132
|
return [input_]
|
122
133
|
|
123
134
|
if isinstance(input_, Iterable) and not isinstance(input_, _BYTE_LIKE):
|
lionagi/ln/fuzzy/_fuzzy_match.py
CHANGED
@@ -1,8 +1,7 @@
|
|
1
|
-
from collections.abc import Sequence
|
2
1
|
from dataclasses import dataclass
|
3
2
|
from typing import Any, ClassVar, Literal
|
4
3
|
|
5
|
-
from ..types import
|
4
|
+
from ..types import KeysLike, Params, Unset
|
6
5
|
from ._string_similarity import (
|
7
6
|
SIMILARITY_ALGO_MAP,
|
8
7
|
SIMILARITY_TYPE,
|
@@ -21,7 +20,7 @@ HandleUnmatched = Literal["ignore", "raise", "remove", "fill", "force"]
|
|
21
20
|
|
22
21
|
def fuzzy_match_keys(
|
23
22
|
d_: dict[str, Any],
|
24
|
-
keys:
|
23
|
+
keys: KeysLike,
|
25
24
|
/,
|
26
25
|
*,
|
27
26
|
similarity_algo: SIMILARITY_TYPE | SimilarityFunc = "jaro_winkler",
|
@@ -166,7 +165,5 @@ class FuzzyMatchKeysParams(Params):
|
|
166
165
|
fill_mapping: dict[str, Any] | Any = Unset
|
167
166
|
strict: bool = False
|
168
167
|
|
169
|
-
def __call__(
|
170
|
-
self, d_: dict[str, Any], keys: Sequence[str] | KeysDict
|
171
|
-
) -> dict[str, Any]:
|
168
|
+
def __call__(self, d_: dict[str, Any], keys: KeysLike) -> dict[str, Any]:
|
172
169
|
return fuzzy_match_keys(d_, keys, **self.default_kw())
|
@@ -1,23 +1,25 @@
|
|
1
|
-
from collections.abc import Callable
|
2
|
-
from typing import Any, Literal
|
3
|
-
|
4
|
-
from pydantic import BaseModel
|
1
|
+
from collections.abc import Callable
|
2
|
+
from typing import TYPE_CHECKING, Any, Literal
|
5
3
|
|
6
4
|
from lionagi._errors import ValidationError
|
7
5
|
|
8
|
-
from ..types import
|
6
|
+
from ..types import KeysLike
|
9
7
|
from ._extract_json import extract_json
|
10
8
|
from ._fuzzy_match import FuzzyMatchKeysParams, fuzzy_match_keys
|
11
9
|
from ._string_similarity import SIMILARITY_TYPE
|
12
10
|
from ._to_dict import to_dict
|
13
11
|
|
12
|
+
if TYPE_CHECKING:
|
13
|
+
from pydantic import BaseModel
|
14
|
+
|
15
|
+
|
14
16
|
__all__ = ("fuzzy_validate_pydantic",)
|
15
17
|
|
16
18
|
|
17
19
|
def fuzzy_validate_pydantic(
|
18
20
|
text,
|
19
21
|
/,
|
20
|
-
model_type: type[BaseModel],
|
22
|
+
model_type: "type[BaseModel]",
|
21
23
|
fuzzy_parse: bool = True,
|
22
24
|
fuzzy_match: bool = False,
|
23
25
|
fuzzy_match_params: FuzzyMatchKeysParams | dict = None,
|
@@ -54,7 +56,7 @@ def fuzzy_validate_pydantic(
|
|
54
56
|
|
55
57
|
def fuzzy_validate_mapping(
|
56
58
|
d: Any,
|
57
|
-
keys:
|
59
|
+
keys: KeysLike,
|
58
60
|
/,
|
59
61
|
*,
|
60
62
|
similarity_algo: (
|
@@ -108,7 +110,6 @@ def fuzzy_validate_mapping(
|
|
108
110
|
# Try converting to dictionary
|
109
111
|
try:
|
110
112
|
if isinstance(d, str):
|
111
|
-
# First try to_json for JSON strings and code blocks
|
112
113
|
try:
|
113
114
|
json_result = extract_json(
|
114
115
|
d, fuzzy_parse=True, return_one_if_single=True
|
@@ -2,11 +2,13 @@
|
|
2
2
|
#
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
4
4
|
|
5
|
-
from collections.abc import Callable
|
5
|
+
from collections.abc import Callable
|
6
6
|
from dataclasses import dataclass
|
7
|
-
from
|
8
|
-
|
9
|
-
|
7
|
+
from typing import TYPE_CHECKING, Literal
|
8
|
+
|
9
|
+
if TYPE_CHECKING:
|
10
|
+
from collections.abc import Sequence
|
11
|
+
|
10
12
|
|
11
13
|
__all__ = ("string_similarity",)
|
12
14
|
|
@@ -157,6 +159,8 @@ def levenshtein_distance(a: str, b: str) -> int:
|
|
157
159
|
int: Minimum number of single-character edits needed to change one
|
158
160
|
string into the other
|
159
161
|
"""
|
162
|
+
from itertools import product
|
163
|
+
|
160
164
|
if not a:
|
161
165
|
return len(b)
|
162
166
|
if not b:
|
@@ -213,6 +217,8 @@ def sequence_matcher_similarity(s1: str, s2: str) -> float:
|
|
213
217
|
Returns:
|
214
218
|
float: Similarity score between 0 and 1
|
215
219
|
"""
|
220
|
+
from difflib import SequenceMatcher
|
221
|
+
|
216
222
|
return SequenceMatcher(None, s1, s2).ratio()
|
217
223
|
|
218
224
|
|
@@ -249,7 +255,7 @@ class MatchResult:
|
|
249
255
|
|
250
256
|
def string_similarity(
|
251
257
|
word: str,
|
252
|
-
correct_words: Sequence[str],
|
258
|
+
correct_words: "Sequence[str]",
|
253
259
|
algorithm: SIMILARITY_TYPE | Callable[[str, str], float] = "jaro_winkler",
|
254
260
|
threshold: float = 0.0,
|
255
261
|
case_sensitive: bool = False,
|