krons 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. kronos/__init__.py +0 -0
  2. kronos/core/__init__.py +145 -0
  3. kronos/core/broadcaster.py +116 -0
  4. kronos/core/element.py +225 -0
  5. kronos/core/event.py +316 -0
  6. kronos/core/eventbus.py +116 -0
  7. kronos/core/flow.py +356 -0
  8. kronos/core/graph.py +442 -0
  9. kronos/core/node.py +982 -0
  10. kronos/core/pile.py +575 -0
  11. kronos/core/processor.py +494 -0
  12. kronos/core/progression.py +296 -0
  13. kronos/enforcement/__init__.py +57 -0
  14. kronos/enforcement/common/__init__.py +34 -0
  15. kronos/enforcement/common/boolean.py +85 -0
  16. kronos/enforcement/common/choice.py +97 -0
  17. kronos/enforcement/common/mapping.py +118 -0
  18. kronos/enforcement/common/model.py +102 -0
  19. kronos/enforcement/common/number.py +98 -0
  20. kronos/enforcement/common/string.py +140 -0
  21. kronos/enforcement/context.py +129 -0
  22. kronos/enforcement/policy.py +80 -0
  23. kronos/enforcement/registry.py +153 -0
  24. kronos/enforcement/rule.py +312 -0
  25. kronos/enforcement/service.py +370 -0
  26. kronos/enforcement/validator.py +198 -0
  27. kronos/errors.py +146 -0
  28. kronos/operations/__init__.py +32 -0
  29. kronos/operations/builder.py +228 -0
  30. kronos/operations/flow.py +398 -0
  31. kronos/operations/node.py +101 -0
  32. kronos/operations/registry.py +92 -0
  33. kronos/protocols.py +414 -0
  34. kronos/py.typed +0 -0
  35. kronos/services/__init__.py +81 -0
  36. kronos/services/backend.py +286 -0
  37. kronos/services/endpoint.py +608 -0
  38. kronos/services/hook.py +471 -0
  39. kronos/services/imodel.py +465 -0
  40. kronos/services/registry.py +115 -0
  41. kronos/services/utilities/__init__.py +36 -0
  42. kronos/services/utilities/header_factory.py +87 -0
  43. kronos/services/utilities/rate_limited_executor.py +271 -0
  44. kronos/services/utilities/rate_limiter.py +180 -0
  45. kronos/services/utilities/resilience.py +414 -0
  46. kronos/session/__init__.py +41 -0
  47. kronos/session/exchange.py +258 -0
  48. kronos/session/message.py +60 -0
  49. kronos/session/session.py +411 -0
  50. kronos/specs/__init__.py +25 -0
  51. kronos/specs/adapters/__init__.py +0 -0
  52. kronos/specs/adapters/_utils.py +45 -0
  53. kronos/specs/adapters/dataclass_field.py +246 -0
  54. kronos/specs/adapters/factory.py +56 -0
  55. kronos/specs/adapters/pydantic_adapter.py +309 -0
  56. kronos/specs/adapters/sql_ddl.py +946 -0
  57. kronos/specs/catalog/__init__.py +36 -0
  58. kronos/specs/catalog/_audit.py +39 -0
  59. kronos/specs/catalog/_common.py +43 -0
  60. kronos/specs/catalog/_content.py +59 -0
  61. kronos/specs/catalog/_enforcement.py +70 -0
  62. kronos/specs/factory.py +120 -0
  63. kronos/specs/operable.py +314 -0
  64. kronos/specs/phrase.py +405 -0
  65. kronos/specs/protocol.py +140 -0
  66. kronos/specs/spec.py +506 -0
  67. kronos/types/__init__.py +60 -0
  68. kronos/types/_sentinel.py +311 -0
  69. kronos/types/base.py +369 -0
  70. kronos/types/db_types.py +260 -0
  71. kronos/types/identity.py +66 -0
  72. kronos/utils/__init__.py +40 -0
  73. kronos/utils/_hash.py +234 -0
  74. kronos/utils/_json_dump.py +392 -0
  75. kronos/utils/_lazy_init.py +63 -0
  76. kronos/utils/_to_list.py +165 -0
  77. kronos/utils/_to_num.py +85 -0
  78. kronos/utils/_utils.py +375 -0
  79. kronos/utils/concurrency/__init__.py +205 -0
  80. kronos/utils/concurrency/_async_call.py +333 -0
  81. kronos/utils/concurrency/_cancel.py +122 -0
  82. kronos/utils/concurrency/_errors.py +96 -0
  83. kronos/utils/concurrency/_patterns.py +363 -0
  84. kronos/utils/concurrency/_primitives.py +328 -0
  85. kronos/utils/concurrency/_priority_queue.py +135 -0
  86. kronos/utils/concurrency/_resource_tracker.py +110 -0
  87. kronos/utils/concurrency/_run_async.py +67 -0
  88. kronos/utils/concurrency/_task.py +95 -0
  89. kronos/utils/concurrency/_utils.py +79 -0
  90. kronos/utils/fuzzy/__init__.py +14 -0
  91. kronos/utils/fuzzy/_extract_json.py +90 -0
  92. kronos/utils/fuzzy/_fuzzy_json.py +288 -0
  93. kronos/utils/fuzzy/_fuzzy_match.py +149 -0
  94. kronos/utils/fuzzy/_string_similarity.py +187 -0
  95. kronos/utils/fuzzy/_to_dict.py +396 -0
  96. kronos/utils/sql/__init__.py +13 -0
  97. kronos/utils/sql/_sql_validation.py +142 -0
  98. krons-0.1.0.dist-info/METADATA +70 -0
  99. krons-0.1.0.dist-info/RECORD +101 -0
  100. krons-0.1.0.dist-info/WHEEL +4 -0
  101. krons-0.1.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,288 @@
1
+ # Copyright (c) 2025 - 2026, HaiyangLi <quantocean.li at gmail dot com>
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ from typing import Any
5
+
6
+ import orjson
7
+
8
+ # Security limit: Maximum input string size for JSON parsing.
9
+ # Large inputs can cause memory exhaustion during parsing and regex operations.
10
+ # Default: 10MB - generous for most use cases while preventing DoS.
11
+ MAX_JSON_INPUT_SIZE = 10 * 1024 * 1024 # 10 MB
12
+
13
+ __all__ = ("fuzzy_json",)
14
+
15
+
16
+ def fuzzy_json(
17
+ str_to_parse: str,
18
+ /,
19
+ *,
20
+ max_size: int = MAX_JSON_INPUT_SIZE,
21
+ ) -> dict[str, Any] | list[dict[str, Any]]:
22
+ """Parse JSON string with fuzzy error correction (quotes, spacing, brackets).
23
+
24
+ Security Note:
25
+ Input size is limited to prevent memory exhaustion attacks.
26
+ Uses orjson which has built-in recursion depth limits (CVE-2024-27454 fixed).
27
+
28
+ Limitation:
29
+ This is designed for "nearly-valid" JSON from LLMs (single quotes, trailing
30
+ commas, unquoted keys). It is NOT a security boundary for adversarial input.
31
+
32
+ Args:
33
+ str_to_parse: JSON string to parse
34
+ max_size: Maximum allowed input size in bytes (default: 10MB)
35
+
36
+ Returns:
37
+ Either a dict or a list of dicts. Will NOT return primitive types
38
+ (int, float, str, bool, None) or lists of primitives.
39
+
40
+ Raises:
41
+ TypeError: If input is not a string or parsed JSON is a primitive type.
42
+ ValueError: If input is empty, exceeds size limit, or parsing fails.
43
+ """
44
+ _check_valid_str(str_to_parse, max_size=max_size)
45
+
46
+ # Direct parse
47
+ try:
48
+ result = orjson.loads(str_to_parse)
49
+ return _validate_return_type(result)
50
+ except orjson.JSONDecodeError:
51
+ pass
52
+
53
+ # State-machine cleaning
54
+ cleaned = _clean_json_string_safe(str_to_parse)
55
+ try:
56
+ result = orjson.loads(cleaned)
57
+ return _validate_return_type(result)
58
+ except orjson.JSONDecodeError:
59
+ pass
60
+
61
+ # Bracket balancing
62
+ fixed = fix_json_string(cleaned)
63
+ try:
64
+ result = orjson.loads(fixed)
65
+ return _validate_return_type(result)
66
+ except orjson.JSONDecodeError:
67
+ pass
68
+
69
+ # If all attempts fail
70
+ raise ValueError("Invalid JSON string")
71
+
72
+
73
+ def _check_valid_str(
74
+ str_to_parse: str,
75
+ /,
76
+ *,
77
+ max_size: int = MAX_JSON_INPUT_SIZE,
78
+ ) -> None:
79
+ """Validate input string for JSON parsing.
80
+
81
+ Args:
82
+ str_to_parse: Input string to validate
83
+ max_size: Maximum allowed size in bytes
84
+
85
+ Raises:
86
+ TypeError: If input is not a string
87
+ ValueError: If input is empty or exceeds size limit
88
+ """
89
+ if not isinstance(str_to_parse, str):
90
+ raise TypeError("Input must be a string")
91
+ if not str_to_parse.strip():
92
+ raise ValueError("Input string is empty")
93
+ if len(str_to_parse) > max_size:
94
+ msg = (
95
+ f"Input size ({len(str_to_parse)} bytes) exceeds maximum "
96
+ f"({max_size} bytes). This limit prevents memory exhaustion."
97
+ )
98
+ raise ValueError(msg)
99
+
100
+
101
+ def _validate_return_type(result: Any) -> dict[str, Any] | list[dict[str, Any]]:
102
+ """Validate that parsed JSON matches the declared return type.
103
+
104
+ Args:
105
+ result: The result from orjson.loads()
106
+
107
+ Returns:
108
+ The validated result (dict or list[dict])
109
+
110
+ Raises:
111
+ TypeError: If result is a primitive type or list of non-dict elements
112
+ """
113
+ if isinstance(result, dict):
114
+ return result
115
+
116
+ if isinstance(result, list):
117
+ if not result: # Empty list valid (vacuous truth)
118
+ return result
119
+ for i, item in enumerate(result):
120
+ if not isinstance(item, dict):
121
+ raise TypeError(
122
+ f"fuzzy_json returns dict or list[dict], got list with "
123
+ f"non-dict element at index {i}: {type(item).__name__}"
124
+ )
125
+ return result
126
+
127
+ raise TypeError(
128
+ f"fuzzy_json returns dict or list[dict], got primitive type: {type(result).__name__}"
129
+ )
130
+
131
+
132
+ def _clean_json_string_safe(s: str) -> str:
133
+ """State-machine JSON cleaner that preserves string content.
134
+
135
+ Fixes common LLM output issues without corrupting quoted content:
136
+ - Single quotes -> double quotes (with escaping)
137
+ - Trailing commas before ] or }
138
+ - Unquoted object keys
139
+ """
140
+ result: list[str] = []
141
+ pos = 0
142
+ length = len(s)
143
+
144
+ while pos < length:
145
+ char = s[pos]
146
+
147
+ if char == "'": # Convert single-quoted string to double-quoted
148
+ result.append('"')
149
+ pos += 1
150
+ while pos < length:
151
+ inner_char = s[pos]
152
+ if inner_char == "\\": # Escape sequence
153
+ if pos + 1 < length:
154
+ next_char = s[pos + 1]
155
+ if next_char == "'": # \' -> apostrophe
156
+ result.append("'")
157
+ pos += 2
158
+ continue
159
+ result.append(inner_char)
160
+ result.append(next_char)
161
+ pos += 2
162
+ continue
163
+ result.append(inner_char)
164
+ pos += 1
165
+ continue
166
+ if inner_char == "'": # End single-quoted string
167
+ result.append('"')
168
+ pos += 1
169
+ break
170
+ if inner_char == '"': # Escape inner double quotes
171
+ result.append('\\"')
172
+ pos += 1
173
+ continue
174
+ result.append(inner_char)
175
+ pos += 1
176
+ continue
177
+
178
+ if char == '"': # Pass through double-quoted strings
179
+ result.append(char)
180
+ pos += 1
181
+ while pos < length:
182
+ inner_char = s[pos]
183
+ if inner_char == "\\": # Copy escape sequences
184
+ result.append(inner_char)
185
+ if pos + 1 < length:
186
+ pos += 1
187
+ result.append(s[pos])
188
+ pos += 1
189
+ continue
190
+ result.append(inner_char)
191
+ if inner_char == '"':
192
+ pos += 1
193
+ break
194
+ pos += 1
195
+ continue
196
+
197
+ if char in "{,": # Handle unquoted keys and trailing commas
198
+ if char == ",": # Check for trailing comma
199
+ lookahead = pos + 1
200
+ while lookahead < length and s[lookahead] in " \t\n\r":
201
+ lookahead += 1
202
+ if lookahead < length and s[lookahead] in "]}":
203
+ pos += 1 # Skip trailing comma
204
+ continue
205
+
206
+ result.append(char)
207
+ pos += 1
208
+
209
+ while pos < length and s[pos] in " \t\n\r": # Skip whitespace
210
+ result.append(s[pos])
211
+ pos += 1
212
+
213
+ if pos < length and s[pos] not in "\"'{[": # Check for unquoted key
214
+ key_start = pos
215
+ while pos < length and (s[pos].isalnum() or s[pos] == "_"):
216
+ pos += 1
217
+ if pos < length and key_start < pos:
218
+ key_end = pos
219
+ while pos < length and s[pos] in " \t\n\r":
220
+ pos += 1
221
+ if pos < length and s[pos] == ":":
222
+ key = s[key_start:key_end]
223
+ result.append(f'"{key}"')
224
+ continue
225
+ else:
226
+ pos = key_start # Not a key, restore
227
+ continue
228
+
229
+ result.append(char)
230
+ pos += 1
231
+
232
+ return "".join(result).strip()
233
+
234
+
235
+ def fix_json_string(str_to_parse: str, /) -> str:
236
+ """Balance unmatched brackets in JSON string.
237
+
238
+ Args:
239
+ str_to_parse: JSON string with potentially missing closing brackets.
240
+
241
+ Returns:
242
+ JSON string with missing closing brackets appended.
243
+
244
+ Raises:
245
+ ValueError: If input is empty or has mismatched/extra brackets.
246
+ """
247
+ if not str_to_parse:
248
+ raise ValueError("Input string is empty")
249
+
250
+ brackets = {"{": "}", "[": "]"}
251
+ open_brackets = []
252
+ pos = 0
253
+ length = len(str_to_parse)
254
+
255
+ while pos < length:
256
+ char = str_to_parse[pos]
257
+
258
+ if char == "\\":
259
+ pos += 2
260
+ continue
261
+
262
+ if char == '"':
263
+ pos += 1
264
+ while pos < length: # Skip string content
265
+ if str_to_parse[pos] == "\\":
266
+ pos += 2
267
+ continue
268
+ if str_to_parse[pos] == '"':
269
+ pos += 1
270
+ break
271
+ pos += 1
272
+ continue
273
+
274
+ if char in brackets:
275
+ open_brackets.append(brackets[char])
276
+ elif char in brackets.values():
277
+ if not open_brackets:
278
+ raise ValueError("Extra closing bracket found.")
279
+ if open_brackets[-1] != char:
280
+ raise ValueError("Mismatched brackets.")
281
+ open_brackets.pop()
282
+
283
+ pos += 1
284
+
285
+ if open_brackets: # Append missing closing brackets
286
+ str_to_parse += "".join(reversed(open_brackets))
287
+
288
+ return str_to_parse
@@ -0,0 +1,149 @@
1
+ from __future__ import annotations
2
+
3
+ from enum import Enum
4
+ from typing import TYPE_CHECKING, Any, Literal
5
+
6
+ if TYPE_CHECKING:
7
+ from kronos.types import KeysLike
8
+
9
+ from kronos.types._sentinel import Unset
10
+
11
+ from ._string_similarity import SimilarityAlgo, string_similarity
12
+
13
+ __all__ = ("fuzzy_match_keys",)
14
+
15
+ HandleUnmatched = Literal["ignore", "raise", "remove", "fill", "force"]
16
+
17
+
18
+ class HandleUnmatched(Enum):
19
+ """Strategy for handling unmatched keys in fuzzy_match_keys."""
20
+
21
+ IGNORE = "ignore"
22
+ """Keep original keys as-is."""
23
+
24
+ RAISE = "raise"
25
+ """Raise ValueError on unmatched keys."""
26
+
27
+ REMOVE = "remove"
28
+ """Remove unmatched original keys."""
29
+
30
+ FILL = "fill"
31
+ """Fill missing expected keys, keep unmatched original keys."""
32
+
33
+ FORCE = "force"
34
+ """Fill missing expected keys, remove unmatched original keys."""
35
+
36
+
37
+ def fuzzy_match_keys(
38
+ d_: dict[str, Any],
39
+ keys: KeysLike,
40
+ /,
41
+ *,
42
+ similarity_algo: SimilarityAlgo = SimilarityAlgo.JARO_WINKLER,
43
+ similarity_threshold: float = 0.85,
44
+ fuzzy_match: bool = True,
45
+ handle_unmatched: HandleUnmatched = HandleUnmatched.IGNORE,
46
+ fill_value: Any = Unset,
47
+ fill_mapping: dict[str, Any] | None = None,
48
+ strict: bool = False,
49
+ ) -> dict[str, Any]:
50
+ """Validate and correct dict keys using fuzzy string matching.
51
+
52
+ Args:
53
+ d_: Input dictionary to validate.
54
+ keys: Expected keys (list, tuple, or object with .keys()).
55
+ similarity_algo: Algorithm for string similarity comparison.
56
+ similarity_threshold: Minimum similarity score (0.0-1.0).
57
+ fuzzy_match: Enable fuzzy matching for non-exact matches.
58
+ handle_unmatched: Strategy for unmatched keys (see HandleUnmatched).
59
+ fill_value: Default value for missing expected keys.
60
+ fill_mapping: Per-key values for specific missing keys.
61
+ strict: Raise if any expected keys remain unmatched.
62
+
63
+ Returns:
64
+ Dictionary with keys corrected to match expected keys.
65
+
66
+ Raises:
67
+ TypeError: If d_ is not dict or keys is None.
68
+ ValueError: If threshold out of range, or unmatched in RAISE mode.
69
+ """
70
+ # Input validation
71
+ if not isinstance(d_, dict):
72
+ raise TypeError("First argument must be a dictionary")
73
+ if keys is None:
74
+ raise TypeError("Keys argument cannot be None")
75
+ if not 0.0 <= similarity_threshold <= 1.0:
76
+ raise ValueError("similarity_threshold must be between 0.0 and 1.0")
77
+
78
+ # Extract expected keys
79
+ if isinstance(keys, (list, tuple)):
80
+ fields_set = set(keys)
81
+ elif hasattr(keys, "keys"):
82
+ fields_set = set(keys.keys())
83
+ else:
84
+ fields_set = set(keys)
85
+ if not fields_set:
86
+ return d_.copy()
87
+
88
+ corrected_out = {}
89
+ matched_expected = set()
90
+ matched_input = set()
91
+
92
+ # Pass 1: exact matches
93
+ for key in d_:
94
+ if key in fields_set:
95
+ corrected_out[key] = d_[key]
96
+ matched_expected.add(key)
97
+ matched_input.add(key)
98
+
99
+ # Pass 2: fuzzy matching
100
+ if fuzzy_match:
101
+ remaining_input = set(d_.keys()) - matched_input
102
+ remaining_expected = fields_set - matched_expected
103
+
104
+ for key in remaining_input:
105
+ if not remaining_expected:
106
+ break
107
+
108
+ matches = string_similarity(
109
+ key,
110
+ list(remaining_expected),
111
+ algorithm=similarity_algo,
112
+ threshold=similarity_threshold,
113
+ return_most_similar=True,
114
+ )
115
+
116
+ if matches:
117
+ match = matches if isinstance(matches, str) else matches[0] if matches else None
118
+ if match:
119
+ corrected_out[match] = d_[key]
120
+ matched_expected.add(match)
121
+ matched_input.add(key)
122
+ remaining_expected.remove(match)
123
+ elif handle_unmatched == HandleUnmatched.IGNORE:
124
+ corrected_out[key] = d_[key]
125
+
126
+ unmatched_input = set(d_.keys()) - matched_input
127
+ unmatched_expected = fields_set - matched_expected
128
+
129
+ if handle_unmatched == HandleUnmatched.RAISE and unmatched_input:
130
+ raise ValueError(f"Unmatched keys found: {unmatched_input}")
131
+
132
+ elif handle_unmatched == HandleUnmatched.IGNORE:
133
+ for key in unmatched_input:
134
+ corrected_out[key] = d_[key]
135
+
136
+ elif handle_unmatched in (HandleUnmatched.FILL, HandleUnmatched.FORCE):
137
+ for key in unmatched_expected:
138
+ corrected_out[key] = (
139
+ fill_mapping[key] if fill_mapping and key in fill_mapping else fill_value
140
+ )
141
+
142
+ if handle_unmatched == HandleUnmatched.FILL:
143
+ for key in unmatched_input:
144
+ corrected_out[key] = d_[key]
145
+
146
+ if strict and unmatched_expected:
147
+ raise ValueError(f"Missing required keys: {unmatched_expected}")
148
+
149
+ return corrected_out
@@ -0,0 +1,187 @@
1
+ # Copyright (c) 2025 - 2026, HaiyangLi <quantocean.li at gmail dot com>
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """String similarity algorithms with rapidfuzz backend.
5
+
6
+ Provides multiple similarity metrics (Jaro-Winkler, Levenshtein, Hamming,
7
+ cosine, SequenceMatcher) for fuzzy string matching and correction.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from collections.abc import Callable
13
+ from dataclasses import dataclass
14
+ from enum import Enum
15
+ from typing import TYPE_CHECKING, Literal
16
+
17
+ import rapidfuzz.distance as _rf_distance
18
+
19
+ if TYPE_CHECKING:
20
+ from collections.abc import Sequence
21
+
22
+ __all__ = ("SimilarityAlgo", "string_similarity")
23
+
24
+
25
+ class SimilarityAlgo(Enum):
26
+ """String similarity algorithms with associated scoring functions."""
27
+
28
+ JARO_WINKLER = "jaro_winkler"
29
+ LEVENSHTEIN = "levenshtein"
30
+ SEQUENCE_MATCHER = "sequence_matcher"
31
+ HAMMING = "hamming"
32
+ COSINE = "cosine"
33
+
34
+ @classmethod
35
+ def from_potential_valid(cls, v: SimilarityAlgo | str) -> SimilarityAlgo:
36
+ """Convert string to enum, passing through existing enum values."""
37
+ if isinstance(v, cls):
38
+ return v
39
+ try:
40
+ return cls(v)
41
+ except ValueError as e:
42
+ raise ValueError(f"Invalid similarity algorithm: {v}") from e
43
+
44
+ @staticmethod
45
+ def cosine_similarity(s1: str, s2: str) -> float:
46
+ """Character-set cosine similarity. Returns 0.0 if either string empty."""
47
+ if not s1 or not s2:
48
+ return 0.0
49
+ set1, set2 = set(s1), set(s2)
50
+ return len(set1 & set2) / ((len(set1) * len(set2)) ** 0.5)
51
+
52
+ @staticmethod
53
+ def hamming_similarity(s1: str, s2: str) -> float:
54
+ """Positional match ratio. Requires equal-length strings (else 0.0)."""
55
+ if not s1 or not s2 or len(s1) != len(s2):
56
+ return 0.0
57
+ return sum(c1 == c2 for c1, c2 in zip(s1, s2, strict=False)) / len(s1)
58
+
59
+ @staticmethod
60
+ def jaro_winkler_similarity(s: str, t: str, scaling: float = 0.1) -> float:
61
+ """Jaro-Winkler similarity with prefix boost (via rapidfuzz).
62
+
63
+ Args:
64
+ s: First string.
65
+ t: Second string.
66
+ scaling: Prefix weight, must be in [0, 0.25].
67
+
68
+ Returns:
69
+ Similarity score in [0, 1].
70
+
71
+ Raises:
72
+ ValueError: If scaling not in [0, 0.25].
73
+ """
74
+ if not 0 <= scaling <= 0.25:
75
+ raise ValueError("Scaling factor must be between 0 and 0.25")
76
+ return _rf_distance.JaroWinkler.similarity(s, t, prefix_weight=scaling)
77
+
78
+ @staticmethod
79
+ def sequence_matcher_similarity(s1: str, s2: str) -> float:
80
+ """Similarity via difflib.SequenceMatcher (longest contiguous match)."""
81
+ from difflib import SequenceMatcher
82
+
83
+ return SequenceMatcher(None, s1, s2).ratio()
84
+
85
+
86
+ SIMILARITY_TYPE = Literal[
87
+ "jaro_winkler",
88
+ "levenshtein",
89
+ "sequence_matcher",
90
+ "hamming",
91
+ "cosine",
92
+ ]
93
+
94
+ SimilarityFunc = Callable[[str, str], float]
95
+ """Type alias: (str, str) -> float similarity score."""
96
+
97
+
98
+ @dataclass(frozen=True, slots=True)
99
+ class MatchResult:
100
+ """String match with score and original index."""
101
+
102
+ word: str
103
+ score: float
104
+ index: int
105
+
106
+
107
+ def string_similarity(
108
+ word: str,
109
+ correct_words: Sequence[str],
110
+ algorithm: SimilarityAlgo = SimilarityAlgo.JARO_WINKLER,
111
+ threshold: float = 0.0,
112
+ case_sensitive: bool = False,
113
+ return_most_similar: bool = False,
114
+ ) -> str | list[str] | None:
115
+ """Find similar strings using specified similarity algorithm.
116
+
117
+ Args:
118
+ word: The input string to find matches for
119
+ correct_words: List of strings to compare against
120
+ algorithm: Similarity algorithm to use
121
+ threshold: Minimum similarity score (0.0 to 1.0)
122
+ case_sensitive: Whether to consider case when matching
123
+ return_most_similar: Return only the most similar match
124
+
125
+ Returns:
126
+ Matching string(s) or None if no matches found
127
+
128
+ Raises:
129
+ ValueError: If correct_words is empty or threshold is invalid
130
+ """
131
+ if not correct_words:
132
+ raise ValueError("correct_words must not be empty")
133
+
134
+ if not 0.0 <= threshold <= 1.0:
135
+ raise ValueError("threshold must be between 0.0 and 1.0")
136
+
137
+ compare_word = str(word)
138
+ original_words = [str(w) for w in correct_words]
139
+
140
+ if not case_sensitive:
141
+ compare_word = compare_word.lower()
142
+ compare_words = [w.lower() for w in original_words]
143
+ else:
144
+ compare_words = original_words.copy()
145
+
146
+ algo_name: str | None = None
147
+ score_func: Callable[[str, str], float]
148
+ match algorithm:
149
+ case SimilarityAlgo.JARO_WINKLER:
150
+ algo_name = "jaro_winkler"
151
+ score_func = SimilarityAlgo.jaro_winkler_similarity
152
+ case SimilarityAlgo.LEVENSHTEIN:
153
+ algo_name = "levenshtein"
154
+ score_func = _rf_distance.Levenshtein.similarity
155
+ case SimilarityAlgo.SEQUENCE_MATCHER:
156
+ algo_name = "sequence_matcher"
157
+ score_func = SimilarityAlgo.sequence_matcher_similarity
158
+ case SimilarityAlgo.HAMMING:
159
+ algo_name = "hamming"
160
+ score_func = SimilarityAlgo.hamming_similarity
161
+ case SimilarityAlgo.COSINE:
162
+ algo_name = "cosine"
163
+ score_func = SimilarityAlgo.cosine_similarity
164
+ case _:
165
+ raise ValueError(f"Unsupported algorithm: {algorithm}")
166
+
167
+ results = []
168
+ for idx, (orig_word, comp_word) in enumerate(zip(original_words, compare_words, strict=False)):
169
+ if algo_name == "hamming" and len(comp_word) != len(compare_word):
170
+ continue # Hamming requires equal length
171
+ score = score_func(compare_word, comp_word) # type: ignore[operator]
172
+ if score >= threshold:
173
+ results.append(MatchResult(orig_word, score, idx))
174
+
175
+ if not results:
176
+ return None
177
+
178
+ results.sort(key=lambda x: (-x.score, x.index)) # Desc score, asc index
179
+
180
+ if case_sensitive: # Keep only top-scoring matches
181
+ max_score = results[0].score
182
+ results = [r for r in results if r.score == max_score]
183
+
184
+ if return_most_similar:
185
+ return results[0].word
186
+
187
+ return [r.word for r in results]