lionherd-core 1.0.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. lionherd_core/__init__.py +84 -0
  2. lionherd_core/base/__init__.py +30 -0
  3. lionherd_core/base/_utils.py +295 -0
  4. lionherd_core/base/broadcaster.py +128 -0
  5. lionherd_core/base/element.py +300 -0
  6. lionherd_core/base/event.py +322 -0
  7. lionherd_core/base/eventbus.py +112 -0
  8. lionherd_core/base/flow.py +236 -0
  9. lionherd_core/base/graph.py +616 -0
  10. lionherd_core/base/node.py +212 -0
  11. lionherd_core/base/pile.py +811 -0
  12. lionherd_core/base/progression.py +261 -0
  13. lionherd_core/errors.py +104 -0
  14. lionherd_core/libs/__init__.py +2 -0
  15. lionherd_core/libs/concurrency/__init__.py +60 -0
  16. lionherd_core/libs/concurrency/_cancel.py +85 -0
  17. lionherd_core/libs/concurrency/_errors.py +80 -0
  18. lionherd_core/libs/concurrency/_patterns.py +238 -0
  19. lionherd_core/libs/concurrency/_primitives.py +253 -0
  20. lionherd_core/libs/concurrency/_priority_queue.py +135 -0
  21. lionherd_core/libs/concurrency/_resource_tracker.py +66 -0
  22. lionherd_core/libs/concurrency/_task.py +58 -0
  23. lionherd_core/libs/concurrency/_utils.py +61 -0
  24. lionherd_core/libs/schema_handlers/__init__.py +35 -0
  25. lionherd_core/libs/schema_handlers/_function_call_parser.py +122 -0
  26. lionherd_core/libs/schema_handlers/_minimal_yaml.py +88 -0
  27. lionherd_core/libs/schema_handlers/_schema_to_model.py +251 -0
  28. lionherd_core/libs/schema_handlers/_typescript.py +153 -0
  29. lionherd_core/libs/string_handlers/__init__.py +15 -0
  30. lionherd_core/libs/string_handlers/_extract_json.py +65 -0
  31. lionherd_core/libs/string_handlers/_fuzzy_json.py +103 -0
  32. lionherd_core/libs/string_handlers/_string_similarity.py +347 -0
  33. lionherd_core/libs/string_handlers/_to_num.py +63 -0
  34. lionherd_core/ln/__init__.py +45 -0
  35. lionherd_core/ln/_async_call.py +314 -0
  36. lionherd_core/ln/_fuzzy_match.py +166 -0
  37. lionherd_core/ln/_fuzzy_validate.py +151 -0
  38. lionherd_core/ln/_hash.py +141 -0
  39. lionherd_core/ln/_json_dump.py +347 -0
  40. lionherd_core/ln/_list_call.py +110 -0
  41. lionherd_core/ln/_to_dict.py +373 -0
  42. lionherd_core/ln/_to_list.py +190 -0
  43. lionherd_core/ln/_utils.py +156 -0
  44. lionherd_core/lndl/__init__.py +62 -0
  45. lionherd_core/lndl/errors.py +30 -0
  46. lionherd_core/lndl/fuzzy.py +321 -0
  47. lionherd_core/lndl/parser.py +427 -0
  48. lionherd_core/lndl/prompt.py +137 -0
  49. lionherd_core/lndl/resolver.py +323 -0
  50. lionherd_core/lndl/types.py +287 -0
  51. lionherd_core/protocols.py +181 -0
  52. lionherd_core/py.typed +0 -0
  53. lionherd_core/types/__init__.py +46 -0
  54. lionherd_core/types/_sentinel.py +131 -0
  55. lionherd_core/types/base.py +341 -0
  56. lionherd_core/types/operable.py +133 -0
  57. lionherd_core/types/spec.py +313 -0
  58. lionherd_core/types/spec_adapters/__init__.py +10 -0
  59. lionherd_core/types/spec_adapters/_protocol.py +125 -0
  60. lionherd_core/types/spec_adapters/pydantic_field.py +177 -0
  61. lionherd_core-1.0.0a3.dist-info/METADATA +502 -0
  62. lionherd_core-1.0.0a3.dist-info/RECORD +64 -0
  63. lionherd_core-1.0.0a3.dist-info/WHEEL +4 -0
  64. lionherd_core-1.0.0a3.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,63 @@
1
+ # Copyright (c) 2025, HaiyangLi <quantocean.li at gmail dot com>
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ from decimal import Decimal
5
+ from typing import Any
6
+
7
+
8
+ def to_num(
9
+ input_: Any,
10
+ /,
11
+ *,
12
+ upper_bound: int | float | None = None,
13
+ lower_bound: int | float | None = None,
14
+ num_type: type[int] | type[float] = float,
15
+ precision: int | None = None,
16
+ ) -> int | float:
17
+ """Convert input to numeric type with validation and bounds checking.
18
+
19
+ Args:
20
+ input_: Value to convert to number
21
+ upper_bound: Maximum allowed value (inclusive)
22
+ lower_bound: Minimum allowed value (inclusive)
23
+ num_type: Target numeric type (int or float)
24
+ precision: Number of decimal places for rounding (float only)
25
+
26
+ Returns:
27
+ Converted number
28
+
29
+ Raises:
30
+ ValueError: For invalid input or out of bounds values
31
+ TypeError: For invalid input types
32
+ """
33
+ # Validate num_type
34
+ if num_type not in (int, float):
35
+ raise ValueError(f"Invalid number type: {num_type}")
36
+
37
+ # Handle boolean (special case - must check before int)
38
+ if isinstance(input_, (bool, int, float, Decimal)):
39
+ value = float(input_)
40
+ # Handle string input
41
+ elif isinstance(input_, str):
42
+ input_ = input_.strip()
43
+ if not input_:
44
+ raise ValueError("Empty string cannot be converted to number")
45
+ try:
46
+ value = float(input_)
47
+ except ValueError as e:
48
+ raise ValueError(f"Cannot convert '{input_}' to number") from e
49
+ else:
50
+ raise TypeError(f"Cannot convert {type(input_).__name__} to number")
51
+
52
+ # Apply bounds checking
53
+ if upper_bound is not None and value > upper_bound:
54
+ raise ValueError(f"Value {value} exceeds upper bound {upper_bound}")
55
+ if lower_bound is not None and value < lower_bound:
56
+ raise ValueError(f"Value {value} below lower bound {lower_bound}")
57
+
58
+ # Apply precision for float
59
+ if precision is not None and num_type is float:
60
+ value = round(value, precision)
61
+
62
+ # Convert to target type
63
+ return num_type(value)
@@ -0,0 +1,45 @@
1
+ # Copyright (c) 2025, HaiyangLi <quantocean.li at gmail dot com>
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ from ._async_call import AlcallParams, BcallParams, alcall, bcall
5
+ from ._fuzzy_match import FuzzyMatchKeysParams, fuzzy_match_keys
6
+ from ._fuzzy_validate import fuzzy_validate_mapping, fuzzy_validate_pydantic
7
+ from ._hash import hash_dict
8
+ from ._json_dump import (
9
+ get_orjson_default,
10
+ json_dict,
11
+ json_dumpb,
12
+ json_dumps,
13
+ json_lines_iter,
14
+ make_options,
15
+ )
16
+ from ._list_call import lcall
17
+ from ._to_dict import to_dict
18
+ from ._to_list import to_list
19
+ from ._utils import acreate_path, get_bins, import_module, is_import_installed, now_utc
20
+
21
+ __all__ = (
22
+ "AlcallParams",
23
+ "BcallParams",
24
+ "FuzzyMatchKeysParams",
25
+ "acreate_path",
26
+ "alcall",
27
+ "bcall",
28
+ "fuzzy_match_keys",
29
+ "fuzzy_validate_mapping",
30
+ "fuzzy_validate_pydantic",
31
+ "get_bins",
32
+ "get_orjson_default",
33
+ "hash_dict",
34
+ "import_module",
35
+ "is_import_installed",
36
+ "json_dict",
37
+ "json_dumpb",
38
+ "json_dumps",
39
+ "json_lines_iter",
40
+ "lcall",
41
+ "make_options",
42
+ "now_utc",
43
+ "to_dict",
44
+ "to_list",
45
+ )
@@ -0,0 +1,314 @@
1
+ # Copyright (c) 2025, HaiyangLi <quantocean.li at gmail dot com>
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ import threading
5
+ from collections.abc import AsyncGenerator, Callable
6
+ from dataclasses import dataclass
7
+ from typing import Any, ClassVar, ParamSpec, TypeVar
8
+
9
+ from lionherd_core.libs.concurrency import (
10
+ Semaphore,
11
+ create_task_group,
12
+ get_cancelled_exc_class,
13
+ is_coro_func,
14
+ move_on_after,
15
+ non_cancel_subgroup,
16
+ run_sync,
17
+ sleep,
18
+ )
19
+ from lionherd_core.types import ModelConfig, Params, Unset, not_sentinel
20
+
21
+ from ._to_list import to_list
22
+
23
+ T = TypeVar("T")
24
+ P = ParamSpec("P")
25
+
26
+ _INITIALIZED = False
27
+ _MODEL_LIKE = None
28
+ _INIT_LOCK = threading.RLock()
29
+
30
+
31
+ __all__ = (
32
+ "AlcallParams",
33
+ "BcallParams",
34
+ "alcall",
35
+ "bcall",
36
+ )
37
+
38
+
39
+ async def alcall(
40
+ input_: list[Any],
41
+ func: Callable[..., T],
42
+ /,
43
+ *,
44
+ input_flatten: bool = False,
45
+ input_dropna: bool = False,
46
+ input_unique: bool = False,
47
+ input_flatten_tuple_set: bool = False,
48
+ output_flatten: bool = False,
49
+ output_dropna: bool = False,
50
+ output_unique: bool = False,
51
+ output_flatten_tuple_set: bool = False,
52
+ delay_before_start: float = 0,
53
+ retry_initial_delay: float = 0,
54
+ retry_backoff: float = 1,
55
+ retry_default: Any = Unset,
56
+ retry_timeout: float | None = None,
57
+ retry_attempts: int = 0,
58
+ max_concurrent: int | None = None,
59
+ throttle_period: float | None = None,
60
+ return_exceptions: bool = False,
61
+ **kwargs: Any,
62
+ ) -> list[T | BaseException]:
63
+ """Apply function to each list element asynchronously with retry and concurrency control.
64
+
65
+ Args:
66
+ input_: List of items to process (or iterable that will be converted)
67
+ func: Callable to apply (sync or async)
68
+ input_flatten: Flatten nested input structures
69
+ input_dropna: Remove None/undefined from input
70
+ input_unique: Remove duplicate inputs (requires flatten)
71
+ input_flatten_tuple_set: Include tuples/sets in flattening
72
+ output_flatten: Flatten nested output structures
73
+ output_dropna: Remove None/undefined from output
74
+ output_unique: Remove duplicate outputs (requires flatten)
75
+ output_flatten_tuple_set: Include tuples/sets in output flattening
76
+ delay_before_start: Initial delay before processing (seconds)
77
+ retry_initial_delay: Initial retry delay (seconds)
78
+ retry_backoff: Backoff multiplier for retry delays
79
+ retry_default: Default value on retry exhaustion (Unset = raise)
80
+ retry_timeout: Timeout per function call (seconds)
81
+ retry_attempts: Maximum retry attempts (0 = no retry)
82
+ max_concurrent: Max concurrent executions (None = unlimited)
83
+ throttle_period: Delay between starting tasks (seconds)
84
+ return_exceptions: Return exceptions instead of raising
85
+ **kwargs: Additional arguments passed to func
86
+
87
+ Returns:
88
+ List of results (preserves input order, may include exceptions if return_exceptions=True)
89
+
90
+ Raises:
91
+ ValueError: If func is not callable
92
+ TimeoutError: If retry_timeout exceeded
93
+ ExceptionGroup: If return_exceptions=False and tasks raise
94
+ """
95
+
96
+ global _INITIALIZED, _MODEL_LIKE
97
+ if _INITIALIZED is False:
98
+ with _INIT_LOCK:
99
+ # Double-checked locking pattern
100
+ if _INITIALIZED is False:
101
+ from pydantic import BaseModel
102
+
103
+ _MODEL_LIKE = (BaseModel,)
104
+ _INITIALIZED = True
105
+
106
+ # Validate func is a single callable
107
+ if not callable(func):
108
+ # If func is not callable, maybe it's an iterable. Extract one callable if possible.
109
+ try:
110
+ func_list = list(func) # Convert iterable to list
111
+ except TypeError:
112
+ raise ValueError("func must be callable or an iterable containing one callable.")
113
+
114
+ # Ensure exactly one callable is present
115
+ if len(func_list) != 1 or not callable(func_list[0]):
116
+ raise ValueError("Only one callable function is allowed.")
117
+
118
+ func = func_list[0]
119
+
120
+ # Process input if requested
121
+ if any((input_flatten, input_dropna)):
122
+ input_ = to_list(
123
+ input_,
124
+ flatten=input_flatten,
125
+ dropna=input_dropna,
126
+ unique=input_unique,
127
+ flatten_tuple_set=input_flatten_tuple_set,
128
+ )
129
+ else:
130
+ if not isinstance(input_, list):
131
+ # Attempt to iterate
132
+ if isinstance(input_, _MODEL_LIKE):
133
+ # Pydantic model, convert to list
134
+ input_ = [input_]
135
+ else:
136
+ try:
137
+ iter(input_)
138
+ # It's iterable (tuple), convert to list of its contents
139
+ input_ = list(input_)
140
+ except TypeError:
141
+ # Not iterable, just wrap in a list
142
+ input_ = [input_]
143
+
144
+ # Optional initial delay before processing
145
+ if delay_before_start:
146
+ await sleep(delay_before_start)
147
+
148
+ semaphore = Semaphore(max_concurrent) if max_concurrent else None
149
+ throttle_delay = throttle_period or 0
150
+ coro_func = is_coro_func(func)
151
+
152
+ async def call_func(item: Any) -> T:
153
+ if coro_func:
154
+ # Async function
155
+ if retry_timeout is not None:
156
+ with move_on_after(retry_timeout) as cancel_scope:
157
+ result = await func(item, **kwargs)
158
+ if cancel_scope.cancelled_caught:
159
+ raise TimeoutError(f"Function call timed out after {retry_timeout}s")
160
+ return result
161
+ else:
162
+ return await func(item, **kwargs)
163
+ else:
164
+ # Sync function
165
+ if retry_timeout is not None:
166
+ with move_on_after(retry_timeout) as cancel_scope:
167
+ result = await run_sync(func, item, **kwargs)
168
+ if cancel_scope.cancelled_caught:
169
+ raise TimeoutError(f"Function call timed out after {retry_timeout}s")
170
+ return result
171
+ else:
172
+ return await run_sync(func, item, **kwargs)
173
+
174
+ async def execute_task(i: Any, index: int) -> Any:
175
+ attempts = 0
176
+ current_delay = retry_initial_delay
177
+ while True:
178
+ try:
179
+ result = await call_func(i)
180
+ return index, result
181
+
182
+ # if cancelled, re-raise
183
+ except get_cancelled_exc_class():
184
+ raise
185
+
186
+ # handle other exceptions
187
+ except Exception:
188
+ attempts += 1
189
+ if attempts <= retry_attempts:
190
+ if current_delay:
191
+ await sleep(current_delay)
192
+ current_delay *= retry_backoff
193
+ # Retry loop continues
194
+ else:
195
+ # Exhausted retries
196
+ if not_sentinel(retry_default):
197
+ return index, retry_default
198
+ # No default, re-raise
199
+ raise
200
+
201
+ # Preallocate result list and fill by index — preserves order with no lock/sort
202
+ n_items = len(input_)
203
+ out: list[Any] = [None] * n_items
204
+
205
+ async def task_wrapper(item: Any, idx: int) -> None:
206
+ try:
207
+ if semaphore:
208
+ async with semaphore:
209
+ _, result = await execute_task(item, idx)
210
+ else:
211
+ _, result = await execute_task(item, idx)
212
+ out[idx] = result
213
+ except BaseException as exc:
214
+ out[idx] = exc
215
+ if not return_exceptions:
216
+ raise # Propagate to TaskGroup
217
+
218
+ # Execute all tasks using task group
219
+ try:
220
+ async with create_task_group() as tg:
221
+ for idx, item in enumerate(input_):
222
+ tg.start_soon(task_wrapper, item, idx)
223
+ # Apply throttle delay between starting tasks
224
+ if throttle_delay and idx < n_items - 1:
225
+ await sleep(throttle_delay)
226
+ except ExceptionGroup as eg:
227
+ if not return_exceptions:
228
+ # Surface only the non-cancellation subgroup to preserve structure & tracebacks
229
+ rest = non_cancel_subgroup(eg)
230
+ if rest is not None:
231
+ raise rest
232
+ raise
233
+
234
+ output_list = out # already in original order
235
+ return to_list(
236
+ output_list,
237
+ flatten=output_flatten,
238
+ dropna=output_dropna,
239
+ unique=output_unique,
240
+ flatten_tuple_set=output_flatten_tuple_set,
241
+ )
242
+
243
+
244
+ async def bcall(
245
+ input_: list[Any],
246
+ func: Callable[..., T],
247
+ /,
248
+ batch_size: int,
249
+ **kwargs: Any,
250
+ ) -> AsyncGenerator[list[T | BaseException], None]:
251
+ """Process input in batches using alcall. Yields results batch by batch.
252
+
253
+ Args:
254
+ input_: Items to process
255
+ func: Callable to apply
256
+ batch_size: Number of items per batch
257
+ **kwargs: Arguments passed to alcall (see alcall for details)
258
+
259
+ Yields:
260
+ List of results for each batch
261
+ """
262
+ input_ = to_list(input_, flatten=True, dropna=True)
263
+
264
+ for i in range(0, len(input_), batch_size):
265
+ batch = input_[i : i + batch_size]
266
+ yield await alcall(batch, func, **kwargs)
267
+
268
+
269
+ @dataclass(slots=True, init=False, frozen=True)
270
+ class AlcallParams(Params):
271
+ # ClassVar attributes
272
+ _config: ClassVar[ModelConfig] = ModelConfig(none_as_sentinel=True)
273
+ _func: ClassVar[Any] = alcall
274
+
275
+ # input processing
276
+ input_flatten: bool
277
+ input_dropna: bool
278
+ input_unique: bool
279
+ input_flatten_tuple_set: bool
280
+
281
+ # output processing
282
+ output_flatten: bool
283
+ output_dropna: bool
284
+ output_unique: bool
285
+ output_flatten_tuple_set: bool
286
+
287
+ # retry and timeout
288
+ delay_before_start: float
289
+ retry_initial_delay: float
290
+ retry_backoff: float
291
+ retry_default: Any
292
+ retry_timeout: float
293
+ retry_attempts: int
294
+
295
+ # concurrency and throttling
296
+ max_concurrent: int
297
+ throttle_period: float
298
+
299
+ kw: dict[str, Any] = Unset
300
+
301
+ async def __call__(self, input_: list[Any], func: Callable[..., T], **kw: Any) -> list[T]:
302
+ kwargs = {**self.default_kw(), **kw}
303
+ return await alcall(input_, func, **kwargs)
304
+
305
+
306
+ @dataclass(slots=True, init=False, frozen=True)
307
+ class BcallParams(AlcallParams):
308
+ _func: ClassVar[Any] = bcall
309
+
310
+ batch_size: int
311
+
312
+ async def __call__(self, input_: list[Any], func: Callable[..., T], **kw: Any) -> list[T]:
313
+ kwargs = {**self.default_kw(), **kw}
314
+ return await bcall(input_, func, self.batch_size, **kwargs)
@@ -0,0 +1,166 @@
1
+ # Copyright (c) 2025, HaiyangLi <quantocean.li at gmail dot com>
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ from dataclasses import dataclass
5
+ from typing import Any, ClassVar, Literal
6
+
7
+ from ..libs.string_handlers._string_similarity import (
8
+ SIMILARITY_ALGO_MAP,
9
+ SIMILARITY_TYPE,
10
+ SimilarityAlgo,
11
+ SimilarityFunc,
12
+ string_similarity,
13
+ )
14
+ from ..types import KeysLike, ModelConfig, Params, Unset
15
+
16
+ __all__ = (
17
+ "FuzzyMatchKeysParams",
18
+ "fuzzy_match_keys",
19
+ )
20
+
21
+
22
+ HandleUnmatched = Literal["ignore", "raise", "remove", "fill", "force"]
23
+
24
+
25
+ def fuzzy_match_keys(
26
+ d_: dict[str, Any],
27
+ keys: KeysLike,
28
+ /,
29
+ *,
30
+ similarity_algo: SIMILARITY_TYPE | SimilarityAlgo | SimilarityFunc = "jaro_winkler",
31
+ similarity_threshold: float = 0.85,
32
+ fuzzy_match: bool = True,
33
+ handle_unmatched: HandleUnmatched = "ignore",
34
+ fill_value: Any = Unset,
35
+ fill_mapping: dict[str, Any] | None = None,
36
+ strict: bool = False,
37
+ ) -> dict[str, Any]:
38
+ """Validate and correct dict keys using fuzzy string matching.
39
+
40
+ Args:
41
+ d_: Input dictionary to validate
42
+ keys: Expected keys (list or dict-like with .keys())
43
+ similarity_algo: Algorithm for string similarity
44
+ similarity_threshold: Minimum similarity score (0.0-1.0)
45
+ fuzzy_match: Enable fuzzy matching for unmatched keys
46
+ handle_unmatched: How to handle unmatched keys ("ignore", "raise", "remove", "fill", "force")
47
+ fill_value: Default value for missing keys when filling
48
+ fill_mapping: Custom values for specific missing keys
49
+ strict: Raise if expected keys are missing
50
+
51
+ Returns:
52
+ Dictionary with corrected keys
53
+
54
+ Raises:
55
+ TypeError: If d_ is not a dict or keys is None
56
+ ValueError: If similarity_threshold out of range or unmatched keys found in raise mode
57
+ """
58
+ # Input validation
59
+ if not isinstance(d_, dict):
60
+ raise TypeError("First argument must be a dictionary")
61
+ if keys is None:
62
+ raise TypeError("Keys argument cannot be None")
63
+ if not 0.0 <= similarity_threshold <= 1.0:
64
+ raise ValueError("similarity_threshold must be between 0.0 and 1.0")
65
+
66
+ # Extract expected keys
67
+ fields_set = set(keys) if isinstance(keys, list) else set(keys.keys())
68
+ if not fields_set:
69
+ return d_.copy() # Return copy of original if no expected keys
70
+
71
+ # Initialize output dictionary and tracking sets
72
+ corrected_out = {}
73
+ matched_expected = set()
74
+ matched_input = set()
75
+
76
+ # Get similarity function
77
+ if isinstance(similarity_algo, SimilarityAlgo):
78
+ similarity_func = SIMILARITY_ALGO_MAP[similarity_algo.value]
79
+ elif isinstance(similarity_algo, str):
80
+ if similarity_algo not in SIMILARITY_ALGO_MAP:
81
+ raise ValueError(f"Unknown similarity algorithm: {similarity_algo}")
82
+ similarity_func = SIMILARITY_ALGO_MAP[similarity_algo]
83
+ else:
84
+ similarity_func = similarity_algo
85
+
86
+ # First pass: exact matches
87
+ for key in d_:
88
+ if key in fields_set:
89
+ corrected_out[key] = d_[key]
90
+ matched_expected.add(key)
91
+ matched_input.add(key)
92
+
93
+ # Second pass: fuzzy matching if enabled
94
+ if fuzzy_match:
95
+ remaining_input = set(d_.keys()) - matched_input
96
+ remaining_expected = fields_set - matched_expected
97
+
98
+ for key in remaining_input:
99
+ if not remaining_expected:
100
+ break
101
+
102
+ matches = string_similarity(
103
+ key,
104
+ list(remaining_expected),
105
+ algorithm=similarity_func,
106
+ threshold=similarity_threshold,
107
+ return_most_similar=True,
108
+ )
109
+
110
+ if matches:
111
+ match = matches
112
+ corrected_out[match] = d_[key]
113
+ matched_expected.add(match)
114
+ matched_input.add(key)
115
+ remaining_expected.remove(match)
116
+ elif handle_unmatched == "ignore":
117
+ corrected_out[key] = d_[key]
118
+
119
+ # Handle unmatched keys based on handle_unmatched parameter
120
+ unmatched_input = set(d_.keys()) - matched_input
121
+ unmatched_expected = fields_set - matched_expected
122
+
123
+ if handle_unmatched == "raise" and unmatched_input:
124
+ raise ValueError(f"Unmatched keys found: {unmatched_input}")
125
+
126
+ elif handle_unmatched == "ignore":
127
+ for key in unmatched_input:
128
+ corrected_out[key] = d_[key]
129
+
130
+ elif handle_unmatched in ("fill", "force"):
131
+ # Fill missing expected keys
132
+ for key in unmatched_expected:
133
+ if fill_mapping and key in fill_mapping:
134
+ corrected_out[key] = fill_mapping[key]
135
+ else:
136
+ corrected_out[key] = fill_value
137
+
138
+ # For "fill" mode, also keep unmatched original keys
139
+ if handle_unmatched == "fill":
140
+ for key in unmatched_input:
141
+ corrected_out[key] = d_[key]
142
+
143
+ # Check strict mode
144
+ if strict and unmatched_expected:
145
+ raise ValueError(f"Missing required keys: {unmatched_expected}")
146
+
147
+ return corrected_out
148
+
149
+
150
+ @dataclass(slots=True, init=False, frozen=True)
151
+ class FuzzyMatchKeysParams(Params):
152
+ _config: ClassVar[ModelConfig] = ModelConfig(none_as_sentinel=False)
153
+ _func: ClassVar[Any] = fuzzy_match_keys
154
+
155
+ similarity_algo: SIMILARITY_TYPE | SimilarityAlgo | SimilarityFunc = "jaro_winkler"
156
+ similarity_threshold: float = 0.85
157
+
158
+ fuzzy_match: bool = True
159
+ handle_unmatched: HandleUnmatched = "ignore"
160
+
161
+ fill_value: Any = Unset
162
+ fill_mapping: dict[str, Any] | Any = Unset
163
+ strict: bool = False
164
+
165
+ def __call__(self, d_: dict[str, Any], keys: KeysLike) -> dict[str, Any]:
166
+ return fuzzy_match_keys(d_, keys, **self.default_kw())