lionagi 0.16.1__py3-none-any.whl → 0.16.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lionagi/adapters/_utils.py +0 -14
- lionagi/libs/file/save.py +8 -1
- lionagi/ln/__init__.py +10 -0
- lionagi/ln/_json_dump.py +322 -49
- lionagi/ln/fuzzy/__init__.py +4 -1
- lionagi/ln/fuzzy/_fuzzy_validate.py +109 -0
- lionagi/ln/fuzzy/_to_dict.py +388 -0
- lionagi/models/__init__.py +0 -2
- lionagi/operations/brainstorm/brainstorm.py +10 -10
- lionagi/operations/communicate/communicate.py +1 -1
- lionagi/operations/parse/parse.py +1 -1
- lionagi/protocols/generic/element.py +5 -14
- lionagi/protocols/generic/log.py +2 -2
- lionagi/protocols/generic/pile.py +1 -1
- lionagi/protocols/messages/message.py +8 -1
- lionagi/protocols/operatives/operative.py +2 -2
- lionagi/service/connections/endpoint.py +7 -0
- lionagi/service/connections/match_endpoint.py +2 -10
- lionagi/service/connections/providers/types.py +1 -3
- lionagi/service/hooks/hook_event.py +1 -1
- lionagi/service/hooks/hook_registry.py +1 -1
- lionagi/service/rate_limited_processor.py +1 -1
- lionagi/session/branch.py +1 -101
- lionagi/session/session.py +9 -14
- lionagi/utils.py +3 -334
- lionagi/version.py +1 -1
- {lionagi-0.16.1.dist-info → lionagi-0.16.3.dist-info}/METADATA +3 -13
- {lionagi-0.16.1.dist-info → lionagi-0.16.3.dist-info}/RECORD +30 -78
- lionagi/adapters/postgres_model_adapter.py +0 -131
- lionagi/libs/concurrency.py +0 -1
- lionagi/libs/file/params.py +0 -175
- lionagi/libs/nested/__init__.py +0 -3
- lionagi/libs/nested/flatten.py +0 -172
- lionagi/libs/nested/nfilter.py +0 -59
- lionagi/libs/nested/nget.py +0 -45
- lionagi/libs/nested/ninsert.py +0 -104
- lionagi/libs/nested/nmerge.py +0 -158
- lionagi/libs/nested/npop.py +0 -69
- lionagi/libs/nested/nset.py +0 -94
- lionagi/libs/nested/unflatten.py +0 -83
- lionagi/libs/nested/utils.py +0 -189
- lionagi/libs/parse.py +0 -31
- lionagi/libs/schema/json_schema.py +0 -231
- lionagi/libs/token_transform/__init__.py +0 -0
- lionagi/libs/token_transform/base.py +0 -54
- lionagi/libs/token_transform/llmlingua.py +0 -1
- lionagi/libs/token_transform/perplexity.py +0 -450
- lionagi/libs/token_transform/symbolic_compress_context.py +0 -152
- lionagi/libs/token_transform/synthlang.py +0 -9
- lionagi/libs/token_transform/synthlang_/base.py +0 -128
- lionagi/libs/token_transform/synthlang_/resources/frameworks/abstract_algebra.toml +0 -11
- lionagi/libs/token_transform/synthlang_/resources/frameworks/category_theory.toml +0 -11
- lionagi/libs/token_transform/synthlang_/resources/frameworks/complex_analysis.toml +0 -11
- lionagi/libs/token_transform/synthlang_/resources/frameworks/framework_options.json +0 -52
- lionagi/libs/token_transform/synthlang_/resources/frameworks/group_theory.toml +0 -11
- lionagi/libs/token_transform/synthlang_/resources/frameworks/math_logic.toml +0 -11
- lionagi/libs/token_transform/synthlang_/resources/frameworks/reflective_patterns.toml +0 -11
- lionagi/libs/token_transform/synthlang_/resources/frameworks/set_theory.toml +0 -11
- lionagi/libs/token_transform/synthlang_/resources/frameworks/topology_fundamentals.toml +0 -11
- lionagi/libs/token_transform/synthlang_/resources/mapping/lion_emoji_mapping.toml +0 -61
- lionagi/libs/token_transform/synthlang_/resources/mapping/python_math_mapping.toml +0 -41
- lionagi/libs/token_transform/synthlang_/resources/mapping/rust_chinese_mapping.toml +0 -60
- lionagi/libs/token_transform/synthlang_/resources/utility/base_synthlang_system_prompt.toml +0 -11
- lionagi/libs/token_transform/synthlang_/translate_to_synthlang.py +0 -140
- lionagi/libs/token_transform/types.py +0 -15
- lionagi/libs/unstructured/__init__.py +0 -0
- lionagi/libs/unstructured/pdf_to_image.py +0 -45
- lionagi/libs/unstructured/read_image_to_base64.py +0 -33
- lionagi/libs/validate/fuzzy_match_keys.py +0 -7
- lionagi/libs/validate/fuzzy_validate_mapping.py +0 -144
- lionagi/libs/validate/string_similarity.py +0 -7
- lionagi/libs/validate/xml_parser.py +0 -203
- lionagi/models/note.py +0 -383
- lionagi/operations/translate/__init__.py +0 -0
- lionagi/operations/translate/translate.py +0 -47
- lionagi/service/connections/providers/claude_code_.py +0 -294
- lionagi/tools/memory/tools.py +0 -495
- {lionagi-0.16.1.dist-info → lionagi-0.16.3.dist-info}/WHEEL +0 -0
- {lionagi-0.16.1.dist-info → lionagi-0.16.3.dist-info}/licenses/LICENSE +0 -0
lionagi/adapters/_utils.py
CHANGED
@@ -10,17 +10,3 @@ def check_async_postgres_available():
|
|
10
10
|
"This adapter requires postgres option to be installed. "
|
11
11
|
'Please install them using `uv pip install "lionagi[postgres]"`.'
|
12
12
|
)
|
13
|
-
|
14
|
-
|
15
|
-
def check_postgres_available():
|
16
|
-
try:
|
17
|
-
from pydapter.model_adapters.postgres_model import PostgresModelAdapter
|
18
|
-
from sqlalchemy import String
|
19
|
-
from sqlalchemy.orm import DeclarativeBase
|
20
|
-
|
21
|
-
return True
|
22
|
-
except Exception:
|
23
|
-
return ImportError(
|
24
|
-
"This adapter requires postgres option to be installed. "
|
25
|
-
'Please install them using `uv pip install "lionagi[postgres]"`.'
|
26
|
-
)
|
lionagi/libs/file/save.py
CHANGED
@@ -89,7 +89,14 @@ def save_chunks(
|
|
89
89
|
random_hash_digits=random_hash_digits,
|
90
90
|
)
|
91
91
|
save_to_file(
|
92
|
-
ln.json_dumps(
|
92
|
+
ln.json_dumps(
|
93
|
+
chunk,
|
94
|
+
pretty=True,
|
95
|
+
sort_keys=True,
|
96
|
+
append_newline=True,
|
97
|
+
deterministic_sets=True,
|
98
|
+
decimal_as_float=True,
|
99
|
+
),
|
93
100
|
directory=file_path.parent,
|
94
101
|
filename=file_path.name,
|
95
102
|
verbose=verbose,
|
lionagi/ln/__init__.py
CHANGED
@@ -4,7 +4,10 @@ from ._json_dump import (
|
|
4
4
|
DEFAULT_SERIALIZER,
|
5
5
|
DEFAULT_SERIALIZER_OPTION,
|
6
6
|
get_orjson_default,
|
7
|
+
json_dumpb,
|
7
8
|
json_dumps,
|
9
|
+
json_lines_iter,
|
10
|
+
make_options,
|
8
11
|
)
|
9
12
|
from ._list_call import lcall
|
10
13
|
from ._to_list import to_list
|
@@ -34,8 +37,10 @@ from .fuzzy import (
|
|
34
37
|
extract_json,
|
35
38
|
fuzzy_json,
|
36
39
|
fuzzy_match_keys,
|
40
|
+
fuzzy_validate_mapping,
|
37
41
|
fuzzy_validate_pydantic,
|
38
42
|
string_similarity,
|
43
|
+
to_dict,
|
39
44
|
)
|
40
45
|
from .types import is_sentinel, not_sentinel
|
41
46
|
|
@@ -47,6 +52,9 @@ __all__ = (
|
|
47
52
|
"DEFAULT_SERIALIZER_OPTION",
|
48
53
|
"get_orjson_default",
|
49
54
|
"json_dumps",
|
55
|
+
"make_options",
|
56
|
+
"json_dumpb",
|
57
|
+
"json_lines_iter",
|
50
58
|
"lcall",
|
51
59
|
"to_list",
|
52
60
|
"acreate_path",
|
@@ -74,4 +82,6 @@ __all__ = (
|
|
74
82
|
"string_similarity",
|
75
83
|
"is_sentinel",
|
76
84
|
"not_sentinel",
|
85
|
+
"to_dict",
|
86
|
+
"fuzzy_validate_mapping",
|
77
87
|
)
|
lionagi/ln/_json_dump.py
CHANGED
@@ -1,75 +1,348 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
1
3
|
import datetime as dt
|
2
4
|
import decimal
|
3
|
-
|
5
|
+
import re
|
6
|
+
from collections.abc import Callable, Iterable, Mapping
|
7
|
+
from enum import Enum
|
8
|
+
from functools import lru_cache
|
4
9
|
from pathlib import Path
|
10
|
+
from textwrap import shorten
|
11
|
+
from typing import Any
|
5
12
|
from uuid import UUID
|
6
13
|
|
7
14
|
import orjson
|
8
15
|
|
16
|
+
__all__ = [
|
17
|
+
"get_orjson_default",
|
18
|
+
"DEFAULT_SERIALIZER",
|
19
|
+
"DEFAULT_SERIALIZER_OPTION",
|
20
|
+
"make_options",
|
21
|
+
"json_dumpb",
|
22
|
+
"json_dumps",
|
23
|
+
"json_lines_iter",
|
24
|
+
]
|
25
|
+
|
26
|
+
# Types orjson already serializes natively at C/Rust speed.
|
27
|
+
# (We only route them through default() when passthrough is requested.)
|
28
|
+
_NATIVE = (dt.datetime, dt.date, dt.time, UUID)
|
29
|
+
|
30
|
+
# --------- helpers ------------------------------------------------------------
|
31
|
+
|
32
|
+
_ADDR_PAT = re.compile(r" at 0x[0-9A-Fa-f]+")
|
33
|
+
|
34
|
+
|
35
|
+
def _clip(s: str, limit: int = 2048) -> str:
|
36
|
+
return shorten(s, width=limit, placeholder=f"...(+{len(s) - limit} chars)") # type: ignore[arg-type]
|
37
|
+
|
38
|
+
|
39
|
+
def _normalize_for_sorting(x: Any) -> str:
|
40
|
+
"""Normalize repr/str to remove process-specific addresses."""
|
41
|
+
s = str(x)
|
42
|
+
return _ADDR_PAT.sub(" at 0x?", s)
|
43
|
+
|
44
|
+
|
45
|
+
def _stable_sorted_iterable(o: Iterable[Any]) -> list[Any]:
|
46
|
+
"""
|
47
|
+
Deterministic ordering for sets (including mixed types).
|
48
|
+
Key: (class name, normalized str) avoids comparisons across unlike types
|
49
|
+
and removes memory address variance in default reprs.
|
50
|
+
"""
|
51
|
+
return sorted(
|
52
|
+
o, key=lambda x: (x.__class__.__name__, _normalize_for_sorting(x))
|
53
|
+
)
|
54
|
+
|
55
|
+
|
56
|
+
def _safe_exception_payload(ex: Exception) -> dict[str, str]:
|
57
|
+
return {"type": ex.__class__.__name__, "message": str(ex)}
|
9
58
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
59
|
+
|
60
|
+
def _default_serializers(
|
61
|
+
deterministic_sets: bool,
|
62
|
+
decimal_as_float: bool,
|
63
|
+
enum_as_name: bool,
|
64
|
+
passthrough_datetime: bool,
|
65
|
+
) -> dict[type, Callable[[Any], Any]]:
|
66
|
+
ser: dict[type, Callable[[Any], Any]] = {
|
67
|
+
Path: str,
|
68
|
+
decimal.Decimal: (float if decimal_as_float else str),
|
69
|
+
set: (_stable_sorted_iterable if deterministic_sets else list),
|
70
|
+
frozenset: (_stable_sorted_iterable if deterministic_sets else list),
|
18
71
|
}
|
72
|
+
if enum_as_name:
|
73
|
+
ser[Enum] = lambda e: e.name
|
74
|
+
# Only needed if you also set OPT_PASSTHROUGH_DATETIME via options.
|
75
|
+
if passthrough_datetime:
|
76
|
+
ser[dt.datetime] = lambda o: o.isoformat()
|
77
|
+
return ser
|
19
78
|
|
20
79
|
|
21
|
-
|
22
|
-
return [dt.datetime, Path, UUID, decimal.Decimal, set, frozenset]
|
80
|
+
# --------- default() factory --------------------------------------------------
|
23
81
|
|
24
82
|
|
25
83
|
def get_orjson_default(
|
26
|
-
|
27
|
-
|
84
|
+
*,
|
85
|
+
order: list[type] | None = None,
|
86
|
+
additional: Mapping[type, Callable[[Any], Any]] | None = None,
|
28
87
|
extend_default: bool = True,
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
88
|
+
deterministic_sets: bool = False,
|
89
|
+
decimal_as_float: bool = False,
|
90
|
+
enum_as_name: bool = False,
|
91
|
+
passthrough_datetime: bool = False,
|
92
|
+
safe_fallback: bool = False,
|
93
|
+
fallback_clip: int = 2048,
|
94
|
+
) -> Callable[[Any], Any]:
|
35
95
|
"""
|
36
|
-
|
37
|
-
|
38
|
-
|
96
|
+
Build a fast, extensible `default=` callable for orjson.dumps.
|
97
|
+
|
98
|
+
- deterministic_sets: sort set/frozenset deterministically (slower).
|
99
|
+
- decimal_as_float: serialize Decimal as float (faster/smaller; precision loss).
|
100
|
+
- enum_as_name: serialize Enum as .name (else orjson uses .value by default).
|
101
|
+
- passthrough_datetime: if True, also pass OPT_PASSTHROUGH_DATETIME in options.
|
102
|
+
- safe_fallback: if True, unknown objects never raise (for logs);
|
103
|
+
Exceptions become a tiny dict; all else becomes clipped repr(str).
|
104
|
+
|
105
|
+
'order' and 'additional' preserve your override semantics.
|
106
|
+
"""
|
107
|
+
ser = _default_serializers(
|
108
|
+
deterministic_sets=deterministic_sets,
|
109
|
+
decimal_as_float=decimal_as_float,
|
110
|
+
enum_as_name=enum_as_name,
|
111
|
+
passthrough_datetime=passthrough_datetime,
|
112
|
+
)
|
113
|
+
if additional:
|
114
|
+
ser.update(additional)
|
115
|
+
|
116
|
+
base_order: list[type] = [Path, decimal.Decimal, set, frozenset]
|
117
|
+
if enum_as_name:
|
118
|
+
base_order.insert(0, Enum)
|
119
|
+
if passthrough_datetime:
|
120
|
+
base_order.insert(0, dt.datetime)
|
39
121
|
|
40
122
|
if order:
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
123
|
+
order_ = (
|
124
|
+
(base_order + [t for t in order if t not in base_order])
|
125
|
+
if extend_default
|
126
|
+
else list(order)
|
127
|
+
)
|
45
128
|
else:
|
46
|
-
|
47
|
-
|
129
|
+
order_ = base_order.copy()
|
130
|
+
|
131
|
+
if not passthrough_datetime:
|
132
|
+
# Avoid checks for types already on the orjson native fast path.
|
133
|
+
order_ = [t for t in order_ if t not in _NATIVE]
|
134
|
+
|
135
|
+
order_tuple = tuple(order_)
|
136
|
+
cache: dict[type, Callable[[Any], Any]] = {}
|
48
137
|
|
49
|
-
def default(obj):
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
138
|
+
def default(obj: Any) -> Any:
|
139
|
+
typ = obj.__class__
|
140
|
+
func = cache.get(typ)
|
141
|
+
if func is None:
|
142
|
+
for T in order_tuple:
|
143
|
+
if issubclass(typ, T):
|
144
|
+
f = ser.get(T)
|
145
|
+
if f:
|
146
|
+
cache[typ] = f
|
147
|
+
func = f
|
148
|
+
break
|
149
|
+
else:
|
150
|
+
# Duck-typed support for common data holders
|
151
|
+
md = getattr(obj, "model_dump", None)
|
152
|
+
if callable(md):
|
153
|
+
try:
|
154
|
+
return md()
|
155
|
+
except Exception:
|
156
|
+
pass
|
157
|
+
dd = getattr(obj, "dict", None)
|
158
|
+
if callable(dd):
|
159
|
+
try:
|
160
|
+
return dd()
|
161
|
+
except Exception:
|
162
|
+
pass
|
163
|
+
if safe_fallback:
|
164
|
+
if isinstance(obj, Exception):
|
165
|
+
return _safe_exception_payload(obj)
|
166
|
+
return _clip(repr(obj), fallback_clip)
|
167
|
+
raise TypeError(
|
168
|
+
f"Type is not JSON serializable: {typ.__name__}"
|
169
|
+
)
|
170
|
+
return func(obj)
|
54
171
|
|
55
172
|
return default
|
56
173
|
|
57
174
|
|
175
|
+
@lru_cache(maxsize=128)
|
176
|
+
def _cached_default(
|
177
|
+
deterministic_sets: bool,
|
178
|
+
decimal_as_float: bool,
|
179
|
+
enum_as_name: bool,
|
180
|
+
passthrough_datetime: bool,
|
181
|
+
safe_fallback: bool,
|
182
|
+
fallback_clip: int,
|
183
|
+
):
|
184
|
+
return get_orjson_default(
|
185
|
+
deterministic_sets=deterministic_sets,
|
186
|
+
decimal_as_float=decimal_as_float,
|
187
|
+
enum_as_name=enum_as_name,
|
188
|
+
passthrough_datetime=passthrough_datetime,
|
189
|
+
safe_fallback=safe_fallback,
|
190
|
+
fallback_clip=fallback_clip,
|
191
|
+
)
|
192
|
+
|
193
|
+
|
194
|
+
# --------- defaults & options -------------------------------------------------
|
195
|
+
|
196
|
+
# Compact, no newline, no sorting: neutral default for most use-cases.
|
197
|
+
DEFAULT_SERIALIZER_OPTION = 0
|
58
198
|
DEFAULT_SERIALIZER = get_orjson_default()
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
199
|
+
|
200
|
+
|
201
|
+
def make_options(
|
202
|
+
*,
|
203
|
+
pretty: bool = False,
|
204
|
+
sort_keys: bool = False,
|
205
|
+
naive_utc: bool = False,
|
206
|
+
utc_z: bool = False,
|
207
|
+
append_newline: bool = False,
|
208
|
+
passthrough_datetime: bool = False,
|
209
|
+
allow_non_str_keys: bool = False,
|
210
|
+
) -> int:
|
211
|
+
"""
|
212
|
+
Compose orjson 'option' bit flags succinctly.
|
213
|
+
"""
|
214
|
+
opt = 0
|
215
|
+
if append_newline:
|
216
|
+
opt |= orjson.OPT_APPEND_NEWLINE
|
217
|
+
if pretty:
|
218
|
+
opt |= orjson.OPT_INDENT_2
|
219
|
+
if sort_keys:
|
220
|
+
opt |= orjson.OPT_SORT_KEYS
|
221
|
+
if naive_utc:
|
222
|
+
opt |= orjson.OPT_NAIVE_UTC
|
223
|
+
if utc_z:
|
224
|
+
opt |= orjson.OPT_UTC_Z
|
225
|
+
if passthrough_datetime:
|
226
|
+
opt |= orjson.OPT_PASSTHROUGH_DATETIME
|
227
|
+
if allow_non_str_keys:
|
228
|
+
opt |= orjson.OPT_NON_STR_KEYS
|
229
|
+
return opt
|
230
|
+
|
231
|
+
|
232
|
+
# --------- dump helpers -------------------------------------------------------
|
233
|
+
|
234
|
+
|
235
|
+
def json_dumpb(
|
236
|
+
obj: Any,
|
237
|
+
*,
|
238
|
+
pretty: bool = False,
|
239
|
+
sort_keys: bool = False,
|
240
|
+
naive_utc: bool = False,
|
241
|
+
utc_z: bool = False,
|
242
|
+
append_newline: bool = False,
|
243
|
+
allow_non_str_keys: bool = False,
|
244
|
+
deterministic_sets: bool = False,
|
245
|
+
decimal_as_float: bool = False,
|
246
|
+
enum_as_name: bool = False,
|
247
|
+
passthrough_datetime: bool = False,
|
248
|
+
safe_fallback: bool = False,
|
249
|
+
fallback_clip: int = 2048,
|
250
|
+
default: Callable[[Any], Any] | None = None,
|
251
|
+
options: int | None = None,
|
252
|
+
) -> bytes:
|
253
|
+
"""
|
254
|
+
Serialize to **bytes** (fast path). Prefer this in hot code.
|
255
|
+
|
256
|
+
Notes:
|
257
|
+
- If you set passthrough_datetime=True, you likely also want it in options.
|
258
|
+
- safe_fallback=True is recommended for LOGGING ONLY.
|
259
|
+
"""
|
260
|
+
if default is None:
|
261
|
+
default = _cached_default(
|
262
|
+
deterministic_sets=deterministic_sets,
|
263
|
+
decimal_as_float=decimal_as_float,
|
264
|
+
enum_as_name=enum_as_name,
|
265
|
+
passthrough_datetime=passthrough_datetime,
|
266
|
+
safe_fallback=safe_fallback,
|
267
|
+
fallback_clip=fallback_clip,
|
268
|
+
)
|
269
|
+
opt = (
|
270
|
+
options
|
271
|
+
if options is not None
|
272
|
+
else make_options(
|
273
|
+
pretty=pretty,
|
274
|
+
sort_keys=sort_keys,
|
275
|
+
naive_utc=naive_utc,
|
276
|
+
utc_z=utc_z,
|
277
|
+
append_newline=append_newline,
|
278
|
+
passthrough_datetime=passthrough_datetime,
|
279
|
+
allow_non_str_keys=allow_non_str_keys,
|
280
|
+
)
|
72
281
|
)
|
73
|
-
|
74
|
-
|
75
|
-
|
282
|
+
return orjson.dumps(obj, default=default, option=opt)
|
283
|
+
|
284
|
+
|
285
|
+
def json_dumps(
|
286
|
+
obj: Any,
|
287
|
+
/,
|
288
|
+
*,
|
289
|
+
decode: bool = True,
|
290
|
+
**kwargs: Any,
|
291
|
+
) -> str | bytes:
|
292
|
+
"""
|
293
|
+
Serialize to str by default (decode=True), or bytes if decode=False.
|
294
|
+
"""
|
295
|
+
out = json_dumpb(obj, **kwargs)
|
296
|
+
return out.decode("utf-8") if decode else out
|
297
|
+
|
298
|
+
|
299
|
+
# --------- streaming for very large outputs ----------------------------------
|
300
|
+
|
301
|
+
|
302
|
+
def json_lines_iter(
|
303
|
+
it: Iterable[Any],
|
304
|
+
*,
|
305
|
+
# default() configuration for each line
|
306
|
+
deterministic_sets: bool = False,
|
307
|
+
decimal_as_float: bool = False,
|
308
|
+
enum_as_name: bool = False,
|
309
|
+
passthrough_datetime: bool = False,
|
310
|
+
safe_fallback: bool = False,
|
311
|
+
fallback_clip: int = 2048,
|
312
|
+
# options
|
313
|
+
naive_utc: bool = False,
|
314
|
+
utc_z: bool = False,
|
315
|
+
allow_non_str_keys: bool = False,
|
316
|
+
# advanced
|
317
|
+
default: Callable[[Any], Any] | None = None,
|
318
|
+
options: int | None = None,
|
319
|
+
) -> Iterable[bytes]:
|
320
|
+
"""
|
321
|
+
Stream an iterable as **NDJSON** (one JSON object per line) in **bytes**.
|
322
|
+
|
323
|
+
Always ensures a trailing newline per line (OPT_APPEND_NEWLINE).
|
324
|
+
"""
|
325
|
+
if default is None:
|
326
|
+
default = _cached_default(
|
327
|
+
deterministic_sets=deterministic_sets,
|
328
|
+
decimal_as_float=decimal_as_float,
|
329
|
+
enum_as_name=enum_as_name,
|
330
|
+
passthrough_datetime=passthrough_datetime,
|
331
|
+
safe_fallback=safe_fallback,
|
332
|
+
fallback_clip=fallback_clip,
|
333
|
+
)
|
334
|
+
if options is None:
|
335
|
+
opt = make_options(
|
336
|
+
pretty=False,
|
337
|
+
sort_keys=False,
|
338
|
+
naive_utc=naive_utc,
|
339
|
+
utc_z=utc_z,
|
340
|
+
append_newline=True, # enforce newline for NDJSON
|
341
|
+
passthrough_datetime=passthrough_datetime,
|
342
|
+
allow_non_str_keys=allow_non_str_keys,
|
343
|
+
)
|
344
|
+
else:
|
345
|
+
opt = options | orjson.OPT_APPEND_NEWLINE
|
346
|
+
|
347
|
+
for item in it:
|
348
|
+
yield orjson.dumps(item, default=default, option=opt)
|
lionagi/ln/fuzzy/__init__.py
CHANGED
@@ -1,10 +1,12 @@
|
|
1
1
|
from ._extract_json import extract_json
|
2
2
|
from ._fuzzy_json import fuzzy_json
|
3
3
|
from ._fuzzy_match import FuzzyMatchKeysParams, fuzzy_match_keys
|
4
|
-
from ._fuzzy_validate import fuzzy_validate_pydantic
|
4
|
+
from ._fuzzy_validate import fuzzy_validate_mapping, fuzzy_validate_pydantic
|
5
5
|
from ._string_similarity import SIMILARITY_TYPE, string_similarity
|
6
|
+
from ._to_dict import to_dict
|
6
7
|
|
7
8
|
__all__ = (
|
9
|
+
"to_dict",
|
8
10
|
"fuzzy_json",
|
9
11
|
"fuzzy_match_keys",
|
10
12
|
"extract_json",
|
@@ -12,4 +14,5 @@ __all__ = (
|
|
12
14
|
"SIMILARITY_TYPE",
|
13
15
|
"fuzzy_validate_pydantic",
|
14
16
|
"FuzzyMatchKeysParams",
|
17
|
+
"fuzzy_validate_mapping",
|
15
18
|
)
|
@@ -1,9 +1,15 @@
|
|
1
|
+
from collections.abc import Callable, Sequence
|
2
|
+
from typing import Any, Literal
|
3
|
+
|
1
4
|
from pydantic import BaseModel
|
2
5
|
|
3
6
|
from lionagi._errors import ValidationError
|
4
7
|
|
8
|
+
from ..types import KeysDict
|
5
9
|
from ._extract_json import extract_json
|
6
10
|
from ._fuzzy_match import FuzzyMatchKeysParams, fuzzy_match_keys
|
11
|
+
from ._string_similarity import SIMILARITY_TYPE
|
12
|
+
from ._to_dict import to_dict
|
7
13
|
|
8
14
|
__all__ = ("fuzzy_validate_pydantic",)
|
9
15
|
|
@@ -44,3 +50,106 @@ def fuzzy_validate_pydantic(
|
|
44
50
|
return model_type.model_validate(model_data)
|
45
51
|
except Exception as e:
|
46
52
|
raise ValidationError(f"Validation failed: {e}") from e
|
53
|
+
|
54
|
+
|
55
|
+
def fuzzy_validate_mapping(
|
56
|
+
d: Any,
|
57
|
+
keys: Sequence[str] | KeysDict,
|
58
|
+
/,
|
59
|
+
*,
|
60
|
+
similarity_algo: (
|
61
|
+
SIMILARITY_TYPE | Callable[[str, str], float]
|
62
|
+
) = "jaro_winkler",
|
63
|
+
similarity_threshold: float = 0.85,
|
64
|
+
fuzzy_match: bool = True,
|
65
|
+
handle_unmatched: Literal[
|
66
|
+
"ignore", "raise", "remove", "fill", "force"
|
67
|
+
] = "ignore",
|
68
|
+
fill_value: Any = None,
|
69
|
+
fill_mapping: dict[str, Any] | None = None,
|
70
|
+
strict: bool = False,
|
71
|
+
suppress_conversion_errors: bool = False,
|
72
|
+
) -> dict[str, Any]:
|
73
|
+
"""
|
74
|
+
Validate and correct any input into a dictionary with expected keys.
|
75
|
+
|
76
|
+
Args:
|
77
|
+
d: Input to validate. Can be:
|
78
|
+
- Dictionary
|
79
|
+
- JSON string or markdown code block
|
80
|
+
- XML string
|
81
|
+
- Object with to_dict/model_dump method
|
82
|
+
- Any type convertible to dictionary
|
83
|
+
keys: List of expected keys or dictionary mapping keys to types.
|
84
|
+
similarity_algo: String similarity algorithm or custom function.
|
85
|
+
similarity_threshold: Minimum similarity score for fuzzy matching.
|
86
|
+
fuzzy_match: If True, use fuzzy matching for key correction.
|
87
|
+
handle_unmatched: How to handle unmatched keys:
|
88
|
+
- "ignore": Keep unmatched keys
|
89
|
+
- "raise": Raise error for unmatched keys
|
90
|
+
- "remove": Remove unmatched keys
|
91
|
+
- "fill": Fill missing keys with default values
|
92
|
+
- "force": Combine "fill" and "remove" behaviors
|
93
|
+
fill_value: Default value for filling unmatched keys.
|
94
|
+
fill_mapping: Dictionary mapping keys to default values.
|
95
|
+
strict: Raise error if any expected key is missing.
|
96
|
+
suppress_conversion_errors: Return empty dict on conversion errors.
|
97
|
+
|
98
|
+
Returns:
|
99
|
+
Validated and corrected dictionary.
|
100
|
+
|
101
|
+
Raises:
|
102
|
+
ValueError: If input cannot be converted or validation fails.
|
103
|
+
TypeError: If input types are invalid.
|
104
|
+
"""
|
105
|
+
if d is None:
|
106
|
+
raise TypeError("Input cannot be None")
|
107
|
+
|
108
|
+
# Try converting to dictionary
|
109
|
+
try:
|
110
|
+
if isinstance(d, str):
|
111
|
+
# First try to_json for JSON strings and code blocks
|
112
|
+
try:
|
113
|
+
json_result = extract_json(
|
114
|
+
d, fuzzy_parse=True, return_one_if_single=True
|
115
|
+
)
|
116
|
+
dict_input = (
|
117
|
+
json_result[0]
|
118
|
+
if isinstance(json_result, list)
|
119
|
+
else json_result
|
120
|
+
)
|
121
|
+
except Exception:
|
122
|
+
dict_input = to_dict(
|
123
|
+
d, str_type="json", fuzzy_parse=True, suppress=True
|
124
|
+
)
|
125
|
+
else:
|
126
|
+
dict_input = to_dict(
|
127
|
+
d, use_model_dump=True, fuzzy_parse=True, suppress=True
|
128
|
+
)
|
129
|
+
|
130
|
+
if not isinstance(dict_input, dict):
|
131
|
+
if suppress_conversion_errors:
|
132
|
+
dict_input = {}
|
133
|
+
else:
|
134
|
+
raise ValueError(
|
135
|
+
f"Failed to convert input to dictionary: {type(dict_input)}"
|
136
|
+
)
|
137
|
+
|
138
|
+
except Exception as e:
|
139
|
+
if suppress_conversion_errors:
|
140
|
+
dict_input = {}
|
141
|
+
else:
|
142
|
+
raise ValueError(f"Failed to convert input to dictionary: {e}")
|
143
|
+
|
144
|
+
# Validate the dictionary
|
145
|
+
return fuzzy_match_keys(
|
146
|
+
dict_input,
|
147
|
+
keys,
|
148
|
+
similarity_algo=similarity_algo,
|
149
|
+
similarity_threshold=similarity_threshold,
|
150
|
+
fuzzy_match=fuzzy_match,
|
151
|
+
handle_unmatched=handle_unmatched,
|
152
|
+
fill_value=fill_value,
|
153
|
+
fill_mapping=fill_mapping,
|
154
|
+
strict=strict,
|
155
|
+
)
|