lionagi 0.16.1__py3-none-any.whl → 0.16.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. lionagi/adapters/_utils.py +0 -14
  2. lionagi/libs/file/save.py +8 -1
  3. lionagi/ln/__init__.py +10 -0
  4. lionagi/ln/_json_dump.py +322 -49
  5. lionagi/ln/fuzzy/__init__.py +4 -1
  6. lionagi/ln/fuzzy/_fuzzy_validate.py +109 -0
  7. lionagi/ln/fuzzy/_to_dict.py +388 -0
  8. lionagi/models/__init__.py +0 -2
  9. lionagi/operations/brainstorm/brainstorm.py +10 -10
  10. lionagi/operations/communicate/communicate.py +1 -1
  11. lionagi/operations/parse/parse.py +1 -1
  12. lionagi/protocols/generic/element.py +5 -14
  13. lionagi/protocols/generic/log.py +2 -2
  14. lionagi/protocols/generic/pile.py +1 -1
  15. lionagi/protocols/messages/message.py +8 -1
  16. lionagi/protocols/operatives/operative.py +2 -2
  17. lionagi/service/connections/endpoint.py +7 -0
  18. lionagi/service/connections/match_endpoint.py +2 -10
  19. lionagi/service/connections/providers/types.py +1 -3
  20. lionagi/service/hooks/hook_event.py +1 -1
  21. lionagi/service/hooks/hook_registry.py +1 -1
  22. lionagi/service/rate_limited_processor.py +1 -1
  23. lionagi/session/branch.py +1 -101
  24. lionagi/session/session.py +9 -14
  25. lionagi/utils.py +3 -334
  26. lionagi/version.py +1 -1
  27. {lionagi-0.16.1.dist-info → lionagi-0.16.3.dist-info}/METADATA +3 -13
  28. {lionagi-0.16.1.dist-info → lionagi-0.16.3.dist-info}/RECORD +30 -78
  29. lionagi/adapters/postgres_model_adapter.py +0 -131
  30. lionagi/libs/concurrency.py +0 -1
  31. lionagi/libs/file/params.py +0 -175
  32. lionagi/libs/nested/__init__.py +0 -3
  33. lionagi/libs/nested/flatten.py +0 -172
  34. lionagi/libs/nested/nfilter.py +0 -59
  35. lionagi/libs/nested/nget.py +0 -45
  36. lionagi/libs/nested/ninsert.py +0 -104
  37. lionagi/libs/nested/nmerge.py +0 -158
  38. lionagi/libs/nested/npop.py +0 -69
  39. lionagi/libs/nested/nset.py +0 -94
  40. lionagi/libs/nested/unflatten.py +0 -83
  41. lionagi/libs/nested/utils.py +0 -189
  42. lionagi/libs/parse.py +0 -31
  43. lionagi/libs/schema/json_schema.py +0 -231
  44. lionagi/libs/token_transform/__init__.py +0 -0
  45. lionagi/libs/token_transform/base.py +0 -54
  46. lionagi/libs/token_transform/llmlingua.py +0 -1
  47. lionagi/libs/token_transform/perplexity.py +0 -450
  48. lionagi/libs/token_transform/symbolic_compress_context.py +0 -152
  49. lionagi/libs/token_transform/synthlang.py +0 -9
  50. lionagi/libs/token_transform/synthlang_/base.py +0 -128
  51. lionagi/libs/token_transform/synthlang_/resources/frameworks/abstract_algebra.toml +0 -11
  52. lionagi/libs/token_transform/synthlang_/resources/frameworks/category_theory.toml +0 -11
  53. lionagi/libs/token_transform/synthlang_/resources/frameworks/complex_analysis.toml +0 -11
  54. lionagi/libs/token_transform/synthlang_/resources/frameworks/framework_options.json +0 -52
  55. lionagi/libs/token_transform/synthlang_/resources/frameworks/group_theory.toml +0 -11
  56. lionagi/libs/token_transform/synthlang_/resources/frameworks/math_logic.toml +0 -11
  57. lionagi/libs/token_transform/synthlang_/resources/frameworks/reflective_patterns.toml +0 -11
  58. lionagi/libs/token_transform/synthlang_/resources/frameworks/set_theory.toml +0 -11
  59. lionagi/libs/token_transform/synthlang_/resources/frameworks/topology_fundamentals.toml +0 -11
  60. lionagi/libs/token_transform/synthlang_/resources/mapping/lion_emoji_mapping.toml +0 -61
  61. lionagi/libs/token_transform/synthlang_/resources/mapping/python_math_mapping.toml +0 -41
  62. lionagi/libs/token_transform/synthlang_/resources/mapping/rust_chinese_mapping.toml +0 -60
  63. lionagi/libs/token_transform/synthlang_/resources/utility/base_synthlang_system_prompt.toml +0 -11
  64. lionagi/libs/token_transform/synthlang_/translate_to_synthlang.py +0 -140
  65. lionagi/libs/token_transform/types.py +0 -15
  66. lionagi/libs/unstructured/__init__.py +0 -0
  67. lionagi/libs/unstructured/pdf_to_image.py +0 -45
  68. lionagi/libs/unstructured/read_image_to_base64.py +0 -33
  69. lionagi/libs/validate/fuzzy_match_keys.py +0 -7
  70. lionagi/libs/validate/fuzzy_validate_mapping.py +0 -144
  71. lionagi/libs/validate/string_similarity.py +0 -7
  72. lionagi/libs/validate/xml_parser.py +0 -203
  73. lionagi/models/note.py +0 -383
  74. lionagi/operations/translate/__init__.py +0 -0
  75. lionagi/operations/translate/translate.py +0 -47
  76. lionagi/service/connections/providers/claude_code_.py +0 -294
  77. lionagi/tools/memory/tools.py +0 -495
  78. {lionagi-0.16.1.dist-info → lionagi-0.16.3.dist-info}/WHEEL +0 -0
  79. {lionagi-0.16.1.dist-info → lionagi-0.16.3.dist-info}/licenses/LICENSE +0 -0
@@ -10,17 +10,3 @@ def check_async_postgres_available():
10
10
  "This adapter requires postgres option to be installed. "
11
11
  'Please install them using `uv pip install "lionagi[postgres]"`.'
12
12
  )
13
-
14
-
15
- def check_postgres_available():
16
- try:
17
- from pydapter.model_adapters.postgres_model import PostgresModelAdapter
18
- from sqlalchemy import String
19
- from sqlalchemy.orm import DeclarativeBase
20
-
21
- return True
22
- except Exception:
23
- return ImportError(
24
- "This adapter requires postgres option to be installed. "
25
- 'Please install them using `uv pip install "lionagi[postgres]"`.'
26
- )
lionagi/libs/file/save.py CHANGED
@@ -89,7 +89,14 @@ def save_chunks(
89
89
  random_hash_digits=random_hash_digits,
90
90
  )
91
91
  save_to_file(
92
- ln.json_dumps(chunk),
92
+ ln.json_dumps(
93
+ chunk,
94
+ pretty=True,
95
+ sort_keys=True,
96
+ append_newline=True,
97
+ deterministic_sets=True,
98
+ decimal_as_float=True,
99
+ ),
93
100
  directory=file_path.parent,
94
101
  filename=file_path.name,
95
102
  verbose=verbose,
lionagi/ln/__init__.py CHANGED
@@ -4,7 +4,10 @@ from ._json_dump import (
4
4
  DEFAULT_SERIALIZER,
5
5
  DEFAULT_SERIALIZER_OPTION,
6
6
  get_orjson_default,
7
+ json_dumpb,
7
8
  json_dumps,
9
+ json_lines_iter,
10
+ make_options,
8
11
  )
9
12
  from ._list_call import lcall
10
13
  from ._to_list import to_list
@@ -34,8 +37,10 @@ from .fuzzy import (
34
37
  extract_json,
35
38
  fuzzy_json,
36
39
  fuzzy_match_keys,
40
+ fuzzy_validate_mapping,
37
41
  fuzzy_validate_pydantic,
38
42
  string_similarity,
43
+ to_dict,
39
44
  )
40
45
  from .types import is_sentinel, not_sentinel
41
46
 
@@ -47,6 +52,9 @@ __all__ = (
47
52
  "DEFAULT_SERIALIZER_OPTION",
48
53
  "get_orjson_default",
49
54
  "json_dumps",
55
+ "make_options",
56
+ "json_dumpb",
57
+ "json_lines_iter",
50
58
  "lcall",
51
59
  "to_list",
52
60
  "acreate_path",
@@ -74,4 +82,6 @@ __all__ = (
74
82
  "string_similarity",
75
83
  "is_sentinel",
76
84
  "not_sentinel",
85
+ "to_dict",
86
+ "fuzzy_validate_mapping",
77
87
  )
lionagi/ln/_json_dump.py CHANGED
@@ -1,75 +1,348 @@
1
+ from __future__ import annotations
2
+
1
3
  import datetime as dt
2
4
  import decimal
3
- from collections.abc import Callable
5
+ import re
6
+ from collections.abc import Callable, Iterable, Mapping
7
+ from enum import Enum
8
+ from functools import lru_cache
4
9
  from pathlib import Path
10
+ from textwrap import shorten
11
+ from typing import Any
5
12
  from uuid import UUID
6
13
 
7
14
  import orjson
8
15
 
16
+ __all__ = [
17
+ "get_orjson_default",
18
+ "DEFAULT_SERIALIZER",
19
+ "DEFAULT_SERIALIZER_OPTION",
20
+ "make_options",
21
+ "json_dumpb",
22
+ "json_dumps",
23
+ "json_lines_iter",
24
+ ]
25
+
26
+ # Types orjson already serializes natively at C/Rust speed.
27
+ # (We only route them through default() when passthrough is requested.)
28
+ _NATIVE = (dt.datetime, dt.date, dt.time, UUID)
29
+
30
+ # --------- helpers ------------------------------------------------------------
31
+
32
+ _ADDR_PAT = re.compile(r" at 0x[0-9A-Fa-f]+")
33
+
34
+
35
+ def _clip(s: str, limit: int = 2048) -> str:
36
+ return shorten(s, width=limit, placeholder=f"...(+{len(s) - limit} chars)") # type: ignore[arg-type]
37
+
38
+
39
+ def _normalize_for_sorting(x: Any) -> str:
40
+ """Normalize repr/str to remove process-specific addresses."""
41
+ s = str(x)
42
+ return _ADDR_PAT.sub(" at 0x?", s)
43
+
44
+
45
+ def _stable_sorted_iterable(o: Iterable[Any]) -> list[Any]:
46
+ """
47
+ Deterministic ordering for sets (including mixed types).
48
+ Key: (class name, normalized str) avoids comparisons across unlike types
49
+ and removes memory address variance in default reprs.
50
+ """
51
+ return sorted(
52
+ o, key=lambda x: (x.__class__.__name__, _normalize_for_sorting(x))
53
+ )
54
+
55
+
56
+ def _safe_exception_payload(ex: Exception) -> dict[str, str]:
57
+ return {"type": ex.__class__.__name__, "message": str(ex)}
9
58
 
10
- def _get_default_serializers():
11
- return {
12
- dt.datetime: lambda o: o.isoformat(),
13
- Path: lambda o: str(o),
14
- UUID: lambda o: str(o),
15
- decimal.Decimal: lambda o: str(o),
16
- set: lambda o: list(o),
17
- frozenset: lambda o: list(o),
59
+
60
+ def _default_serializers(
61
+ deterministic_sets: bool,
62
+ decimal_as_float: bool,
63
+ enum_as_name: bool,
64
+ passthrough_datetime: bool,
65
+ ) -> dict[type, Callable[[Any], Any]]:
66
+ ser: dict[type, Callable[[Any], Any]] = {
67
+ Path: str,
68
+ decimal.Decimal: (float if decimal_as_float else str),
69
+ set: (_stable_sorted_iterable if deterministic_sets else list),
70
+ frozenset: (_stable_sorted_iterable if deterministic_sets else list),
18
71
  }
72
+ if enum_as_name:
73
+ ser[Enum] = lambda e: e.name
74
+ # Only needed if you also set OPT_PASSTHROUGH_DATETIME via options.
75
+ if passthrough_datetime:
76
+ ser[dt.datetime] = lambda o: o.isoformat()
77
+ return ser
19
78
 
20
79
 
21
- def _get_default_serializer_order():
22
- return [dt.datetime, Path, UUID, decimal.Decimal, set, frozenset]
80
+ # --------- default() factory --------------------------------------------------
23
81
 
24
82
 
25
83
  def get_orjson_default(
26
- order: list[type] = None,
27
- additional: dict[type, Callable] = None,
84
+ *,
85
+ order: list[type] | None = None,
86
+ additional: Mapping[type, Callable[[Any], Any]] | None = None,
28
87
  extend_default: bool = True,
29
- ) -> Callable:
30
- """get the default function for orjson.dumps
31
- Args:
32
- order: order of types to check. Defaults to None.
33
- additional: additional serializers
34
- extend_default: when order is provided, whether to extend the default order or replace it.
88
+ deterministic_sets: bool = False,
89
+ decimal_as_float: bool = False,
90
+ enum_as_name: bool = False,
91
+ passthrough_datetime: bool = False,
92
+ safe_fallback: bool = False,
93
+ fallback_clip: int = 2048,
94
+ ) -> Callable[[Any], Any]:
35
95
  """
36
- dict_ = _get_default_serializers()
37
- dict_.update(additional or {})
38
- order_ = _get_default_serializer_order()
96
+ Build a fast, extensible `default=` callable for orjson.dumps.
97
+
98
+ - deterministic_sets: sort set/frozenset deterministically (slower).
99
+ - decimal_as_float: serialize Decimal as float (faster/smaller; precision loss).
100
+ - enum_as_name: serialize Enum as .name (else orjson uses .value by default).
101
+ - passthrough_datetime: if True, also pass OPT_PASSTHROUGH_DATETIME in options.
102
+ - safe_fallback: if True, unknown objects never raise (for logs);
103
+ Exceptions become a tiny dict; all else becomes clipped repr(str).
104
+
105
+ 'order' and 'additional' preserve your override semantics.
106
+ """
107
+ ser = _default_serializers(
108
+ deterministic_sets=deterministic_sets,
109
+ decimal_as_float=decimal_as_float,
110
+ enum_as_name=enum_as_name,
111
+ passthrough_datetime=passthrough_datetime,
112
+ )
113
+ if additional:
114
+ ser.update(additional)
115
+
116
+ base_order: list[type] = [Path, decimal.Decimal, set, frozenset]
117
+ if enum_as_name:
118
+ base_order.insert(0, Enum)
119
+ if passthrough_datetime:
120
+ base_order.insert(0, dt.datetime)
39
121
 
40
122
  if order:
41
- if len(additional or {}) > 0 and extend_default:
42
- order_.extend([k for k in order if k not in order_])
43
- else:
44
- order_ = list(order)
123
+ order_ = (
124
+ (base_order + [t for t in order if t not in base_order])
125
+ if extend_default
126
+ else list(order)
127
+ )
45
128
  else:
46
- if len(additional or {}) > 0:
47
- order_.extend([k for k in additional.keys() if k not in order_])
129
+ order_ = base_order.copy()
130
+
131
+ if not passthrough_datetime:
132
+ # Avoid checks for types already on the orjson native fast path.
133
+ order_ = [t for t in order_ if t not in _NATIVE]
134
+
135
+ order_tuple = tuple(order_)
136
+ cache: dict[type, Callable[[Any], Any]] = {}
48
137
 
49
- def default(obj):
50
- for t in order_:
51
- if isinstance(obj, t) and t in dict_:
52
- return dict_[t](obj)
53
- raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
138
+ def default(obj: Any) -> Any:
139
+ typ = obj.__class__
140
+ func = cache.get(typ)
141
+ if func is None:
142
+ for T in order_tuple:
143
+ if issubclass(typ, T):
144
+ f = ser.get(T)
145
+ if f:
146
+ cache[typ] = f
147
+ func = f
148
+ break
149
+ else:
150
+ # Duck-typed support for common data holders
151
+ md = getattr(obj, "model_dump", None)
152
+ if callable(md):
153
+ try:
154
+ return md()
155
+ except Exception:
156
+ pass
157
+ dd = getattr(obj, "dict", None)
158
+ if callable(dd):
159
+ try:
160
+ return dd()
161
+ except Exception:
162
+ pass
163
+ if safe_fallback:
164
+ if isinstance(obj, Exception):
165
+ return _safe_exception_payload(obj)
166
+ return _clip(repr(obj), fallback_clip)
167
+ raise TypeError(
168
+ f"Type is not JSON serializable: {typ.__name__}"
169
+ )
170
+ return func(obj)
54
171
 
55
172
  return default
56
173
 
57
174
 
175
+ @lru_cache(maxsize=128)
176
+ def _cached_default(
177
+ deterministic_sets: bool,
178
+ decimal_as_float: bool,
179
+ enum_as_name: bool,
180
+ passthrough_datetime: bool,
181
+ safe_fallback: bool,
182
+ fallback_clip: int,
183
+ ):
184
+ return get_orjson_default(
185
+ deterministic_sets=deterministic_sets,
186
+ decimal_as_float=decimal_as_float,
187
+ enum_as_name=enum_as_name,
188
+ passthrough_datetime=passthrough_datetime,
189
+ safe_fallback=safe_fallback,
190
+ fallback_clip=fallback_clip,
191
+ )
192
+
193
+
194
+ # --------- defaults & options -------------------------------------------------
195
+
196
+ # Compact, no newline, no sorting: neutral default for most use-cases.
197
+ DEFAULT_SERIALIZER_OPTION = 0
58
198
  DEFAULT_SERIALIZER = get_orjson_default()
59
- DEFAULT_SERIALIZER_OPTION = (
60
- orjson.OPT_INDENT_2
61
- | orjson.OPT_SORT_KEYS
62
- | orjson.OPT_APPEND_NEWLINE
63
- | orjson.OPT_SERIALIZE_DATACLASS
64
- )
65
-
66
-
67
- def json_dumps(d_, decode=True, /) -> str:
68
- by_ = orjson.dumps(
69
- d_,
70
- default=DEFAULT_SERIALIZER,
71
- option=DEFAULT_SERIALIZER_OPTION,
199
+
200
+
201
+ def make_options(
202
+ *,
203
+ pretty: bool = False,
204
+ sort_keys: bool = False,
205
+ naive_utc: bool = False,
206
+ utc_z: bool = False,
207
+ append_newline: bool = False,
208
+ passthrough_datetime: bool = False,
209
+ allow_non_str_keys: bool = False,
210
+ ) -> int:
211
+ """
212
+ Compose orjson 'option' bit flags succinctly.
213
+ """
214
+ opt = 0
215
+ if append_newline:
216
+ opt |= orjson.OPT_APPEND_NEWLINE
217
+ if pretty:
218
+ opt |= orjson.OPT_INDENT_2
219
+ if sort_keys:
220
+ opt |= orjson.OPT_SORT_KEYS
221
+ if naive_utc:
222
+ opt |= orjson.OPT_NAIVE_UTC
223
+ if utc_z:
224
+ opt |= orjson.OPT_UTC_Z
225
+ if passthrough_datetime:
226
+ opt |= orjson.OPT_PASSTHROUGH_DATETIME
227
+ if allow_non_str_keys:
228
+ opt |= orjson.OPT_NON_STR_KEYS
229
+ return opt
230
+
231
+
232
+ # --------- dump helpers -------------------------------------------------------
233
+
234
+
235
+ def json_dumpb(
236
+ obj: Any,
237
+ *,
238
+ pretty: bool = False,
239
+ sort_keys: bool = False,
240
+ naive_utc: bool = False,
241
+ utc_z: bool = False,
242
+ append_newline: bool = False,
243
+ allow_non_str_keys: bool = False,
244
+ deterministic_sets: bool = False,
245
+ decimal_as_float: bool = False,
246
+ enum_as_name: bool = False,
247
+ passthrough_datetime: bool = False,
248
+ safe_fallback: bool = False,
249
+ fallback_clip: int = 2048,
250
+ default: Callable[[Any], Any] | None = None,
251
+ options: int | None = None,
252
+ ) -> bytes:
253
+ """
254
+ Serialize to **bytes** (fast path). Prefer this in hot code.
255
+
256
+ Notes:
257
+ - If you set passthrough_datetime=True, you likely also want it in options.
258
+ - safe_fallback=True is recommended for LOGGING ONLY.
259
+ """
260
+ if default is None:
261
+ default = _cached_default(
262
+ deterministic_sets=deterministic_sets,
263
+ decimal_as_float=decimal_as_float,
264
+ enum_as_name=enum_as_name,
265
+ passthrough_datetime=passthrough_datetime,
266
+ safe_fallback=safe_fallback,
267
+ fallback_clip=fallback_clip,
268
+ )
269
+ opt = (
270
+ options
271
+ if options is not None
272
+ else make_options(
273
+ pretty=pretty,
274
+ sort_keys=sort_keys,
275
+ naive_utc=naive_utc,
276
+ utc_z=utc_z,
277
+ append_newline=append_newline,
278
+ passthrough_datetime=passthrough_datetime,
279
+ allow_non_str_keys=allow_non_str_keys,
280
+ )
72
281
  )
73
- if decode:
74
- return by_.decode("utf-8")
75
- return by_
282
+ return orjson.dumps(obj, default=default, option=opt)
283
+
284
+
285
+ def json_dumps(
286
+ obj: Any,
287
+ /,
288
+ *,
289
+ decode: bool = True,
290
+ **kwargs: Any,
291
+ ) -> str | bytes:
292
+ """
293
+ Serialize to str by default (decode=True), or bytes if decode=False.
294
+ """
295
+ out = json_dumpb(obj, **kwargs)
296
+ return out.decode("utf-8") if decode else out
297
+
298
+
299
+ # --------- streaming for very large outputs ----------------------------------
300
+
301
+
302
+ def json_lines_iter(
303
+ it: Iterable[Any],
304
+ *,
305
+ # default() configuration for each line
306
+ deterministic_sets: bool = False,
307
+ decimal_as_float: bool = False,
308
+ enum_as_name: bool = False,
309
+ passthrough_datetime: bool = False,
310
+ safe_fallback: bool = False,
311
+ fallback_clip: int = 2048,
312
+ # options
313
+ naive_utc: bool = False,
314
+ utc_z: bool = False,
315
+ allow_non_str_keys: bool = False,
316
+ # advanced
317
+ default: Callable[[Any], Any] | None = None,
318
+ options: int | None = None,
319
+ ) -> Iterable[bytes]:
320
+ """
321
+ Stream an iterable as **NDJSON** (one JSON object per line) in **bytes**.
322
+
323
+ Always ensures a trailing newline per line (OPT_APPEND_NEWLINE).
324
+ """
325
+ if default is None:
326
+ default = _cached_default(
327
+ deterministic_sets=deterministic_sets,
328
+ decimal_as_float=decimal_as_float,
329
+ enum_as_name=enum_as_name,
330
+ passthrough_datetime=passthrough_datetime,
331
+ safe_fallback=safe_fallback,
332
+ fallback_clip=fallback_clip,
333
+ )
334
+ if options is None:
335
+ opt = make_options(
336
+ pretty=False,
337
+ sort_keys=False,
338
+ naive_utc=naive_utc,
339
+ utc_z=utc_z,
340
+ append_newline=True, # enforce newline for NDJSON
341
+ passthrough_datetime=passthrough_datetime,
342
+ allow_non_str_keys=allow_non_str_keys,
343
+ )
344
+ else:
345
+ opt = options | orjson.OPT_APPEND_NEWLINE
346
+
347
+ for item in it:
348
+ yield orjson.dumps(item, default=default, option=opt)
@@ -1,10 +1,12 @@
1
1
  from ._extract_json import extract_json
2
2
  from ._fuzzy_json import fuzzy_json
3
3
  from ._fuzzy_match import FuzzyMatchKeysParams, fuzzy_match_keys
4
- from ._fuzzy_validate import fuzzy_validate_pydantic
4
+ from ._fuzzy_validate import fuzzy_validate_mapping, fuzzy_validate_pydantic
5
5
  from ._string_similarity import SIMILARITY_TYPE, string_similarity
6
+ from ._to_dict import to_dict
6
7
 
7
8
  __all__ = (
9
+ "to_dict",
8
10
  "fuzzy_json",
9
11
  "fuzzy_match_keys",
10
12
  "extract_json",
@@ -12,4 +14,5 @@ __all__ = (
12
14
  "SIMILARITY_TYPE",
13
15
  "fuzzy_validate_pydantic",
14
16
  "FuzzyMatchKeysParams",
17
+ "fuzzy_validate_mapping",
15
18
  )
@@ -1,9 +1,15 @@
1
+ from collections.abc import Callable, Sequence
2
+ from typing import Any, Literal
3
+
1
4
  from pydantic import BaseModel
2
5
 
3
6
  from lionagi._errors import ValidationError
4
7
 
8
+ from ..types import KeysDict
5
9
  from ._extract_json import extract_json
6
10
  from ._fuzzy_match import FuzzyMatchKeysParams, fuzzy_match_keys
11
+ from ._string_similarity import SIMILARITY_TYPE
12
+ from ._to_dict import to_dict
7
13
 
8
14
  __all__ = ("fuzzy_validate_pydantic",)
9
15
 
@@ -44,3 +50,106 @@ def fuzzy_validate_pydantic(
44
50
  return model_type.model_validate(model_data)
45
51
  except Exception as e:
46
52
  raise ValidationError(f"Validation failed: {e}") from e
53
+
54
+
55
+ def fuzzy_validate_mapping(
56
+ d: Any,
57
+ keys: Sequence[str] | KeysDict,
58
+ /,
59
+ *,
60
+ similarity_algo: (
61
+ SIMILARITY_TYPE | Callable[[str, str], float]
62
+ ) = "jaro_winkler",
63
+ similarity_threshold: float = 0.85,
64
+ fuzzy_match: bool = True,
65
+ handle_unmatched: Literal[
66
+ "ignore", "raise", "remove", "fill", "force"
67
+ ] = "ignore",
68
+ fill_value: Any = None,
69
+ fill_mapping: dict[str, Any] | None = None,
70
+ strict: bool = False,
71
+ suppress_conversion_errors: bool = False,
72
+ ) -> dict[str, Any]:
73
+ """
74
+ Validate and correct any input into a dictionary with expected keys.
75
+
76
+ Args:
77
+ d: Input to validate. Can be:
78
+ - Dictionary
79
+ - JSON string or markdown code block
80
+ - XML string
81
+ - Object with to_dict/model_dump method
82
+ - Any type convertible to dictionary
83
+ keys: List of expected keys or dictionary mapping keys to types.
84
+ similarity_algo: String similarity algorithm or custom function.
85
+ similarity_threshold: Minimum similarity score for fuzzy matching.
86
+ fuzzy_match: If True, use fuzzy matching for key correction.
87
+ handle_unmatched: How to handle unmatched keys:
88
+ - "ignore": Keep unmatched keys
89
+ - "raise": Raise error for unmatched keys
90
+ - "remove": Remove unmatched keys
91
+ - "fill": Fill missing keys with default values
92
+ - "force": Combine "fill" and "remove" behaviors
93
+ fill_value: Default value for filling unmatched keys.
94
+ fill_mapping: Dictionary mapping keys to default values.
95
+ strict: Raise error if any expected key is missing.
96
+ suppress_conversion_errors: Return empty dict on conversion errors.
97
+
98
+ Returns:
99
+ Validated and corrected dictionary.
100
+
101
+ Raises:
102
+ ValueError: If input cannot be converted or validation fails.
103
+ TypeError: If input types are invalid.
104
+ """
105
+ if d is None:
106
+ raise TypeError("Input cannot be None")
107
+
108
+ # Try converting to dictionary
109
+ try:
110
+ if isinstance(d, str):
111
+ # First try to_json for JSON strings and code blocks
112
+ try:
113
+ json_result = extract_json(
114
+ d, fuzzy_parse=True, return_one_if_single=True
115
+ )
116
+ dict_input = (
117
+ json_result[0]
118
+ if isinstance(json_result, list)
119
+ else json_result
120
+ )
121
+ except Exception:
122
+ dict_input = to_dict(
123
+ d, str_type="json", fuzzy_parse=True, suppress=True
124
+ )
125
+ else:
126
+ dict_input = to_dict(
127
+ d, use_model_dump=True, fuzzy_parse=True, suppress=True
128
+ )
129
+
130
+ if not isinstance(dict_input, dict):
131
+ if suppress_conversion_errors:
132
+ dict_input = {}
133
+ else:
134
+ raise ValueError(
135
+ f"Failed to convert input to dictionary: {type(dict_input)}"
136
+ )
137
+
138
+ except Exception as e:
139
+ if suppress_conversion_errors:
140
+ dict_input = {}
141
+ else:
142
+ raise ValueError(f"Failed to convert input to dictionary: {e}")
143
+
144
+ # Validate the dictionary
145
+ return fuzzy_match_keys(
146
+ dict_input,
147
+ keys,
148
+ similarity_algo=similarity_algo,
149
+ similarity_threshold=similarity_threshold,
150
+ fuzzy_match=fuzzy_match,
151
+ handle_unmatched=handle_unmatched,
152
+ fill_value=fill_value,
153
+ fill_mapping=fill_mapping,
154
+ strict=strict,
155
+ )