krons 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. kronos/__init__.py +0 -0
  2. kronos/core/__init__.py +145 -0
  3. kronos/core/broadcaster.py +116 -0
  4. kronos/core/element.py +225 -0
  5. kronos/core/event.py +316 -0
  6. kronos/core/eventbus.py +116 -0
  7. kronos/core/flow.py +356 -0
  8. kronos/core/graph.py +442 -0
  9. kronos/core/node.py +982 -0
  10. kronos/core/pile.py +575 -0
  11. kronos/core/processor.py +494 -0
  12. kronos/core/progression.py +296 -0
  13. kronos/enforcement/__init__.py +57 -0
  14. kronos/enforcement/common/__init__.py +34 -0
  15. kronos/enforcement/common/boolean.py +85 -0
  16. kronos/enforcement/common/choice.py +97 -0
  17. kronos/enforcement/common/mapping.py +118 -0
  18. kronos/enforcement/common/model.py +102 -0
  19. kronos/enforcement/common/number.py +98 -0
  20. kronos/enforcement/common/string.py +140 -0
  21. kronos/enforcement/context.py +129 -0
  22. kronos/enforcement/policy.py +80 -0
  23. kronos/enforcement/registry.py +153 -0
  24. kronos/enforcement/rule.py +312 -0
  25. kronos/enforcement/service.py +370 -0
  26. kronos/enforcement/validator.py +198 -0
  27. kronos/errors.py +146 -0
  28. kronos/operations/__init__.py +32 -0
  29. kronos/operations/builder.py +228 -0
  30. kronos/operations/flow.py +398 -0
  31. kronos/operations/node.py +101 -0
  32. kronos/operations/registry.py +92 -0
  33. kronos/protocols.py +414 -0
  34. kronos/py.typed +0 -0
  35. kronos/services/__init__.py +81 -0
  36. kronos/services/backend.py +286 -0
  37. kronos/services/endpoint.py +608 -0
  38. kronos/services/hook.py +471 -0
  39. kronos/services/imodel.py +465 -0
  40. kronos/services/registry.py +115 -0
  41. kronos/services/utilities/__init__.py +36 -0
  42. kronos/services/utilities/header_factory.py +87 -0
  43. kronos/services/utilities/rate_limited_executor.py +271 -0
  44. kronos/services/utilities/rate_limiter.py +180 -0
  45. kronos/services/utilities/resilience.py +414 -0
  46. kronos/session/__init__.py +41 -0
  47. kronos/session/exchange.py +258 -0
  48. kronos/session/message.py +60 -0
  49. kronos/session/session.py +411 -0
  50. kronos/specs/__init__.py +25 -0
  51. kronos/specs/adapters/__init__.py +0 -0
  52. kronos/specs/adapters/_utils.py +45 -0
  53. kronos/specs/adapters/dataclass_field.py +246 -0
  54. kronos/specs/adapters/factory.py +56 -0
  55. kronos/specs/adapters/pydantic_adapter.py +309 -0
  56. kronos/specs/adapters/sql_ddl.py +946 -0
  57. kronos/specs/catalog/__init__.py +36 -0
  58. kronos/specs/catalog/_audit.py +39 -0
  59. kronos/specs/catalog/_common.py +43 -0
  60. kronos/specs/catalog/_content.py +59 -0
  61. kronos/specs/catalog/_enforcement.py +70 -0
  62. kronos/specs/factory.py +120 -0
  63. kronos/specs/operable.py +314 -0
  64. kronos/specs/phrase.py +405 -0
  65. kronos/specs/protocol.py +140 -0
  66. kronos/specs/spec.py +506 -0
  67. kronos/types/__init__.py +60 -0
  68. kronos/types/_sentinel.py +311 -0
  69. kronos/types/base.py +369 -0
  70. kronos/types/db_types.py +260 -0
  71. kronos/types/identity.py +66 -0
  72. kronos/utils/__init__.py +40 -0
  73. kronos/utils/_hash.py +234 -0
  74. kronos/utils/_json_dump.py +392 -0
  75. kronos/utils/_lazy_init.py +63 -0
  76. kronos/utils/_to_list.py +165 -0
  77. kronos/utils/_to_num.py +85 -0
  78. kronos/utils/_utils.py +375 -0
  79. kronos/utils/concurrency/__init__.py +205 -0
  80. kronos/utils/concurrency/_async_call.py +333 -0
  81. kronos/utils/concurrency/_cancel.py +122 -0
  82. kronos/utils/concurrency/_errors.py +96 -0
  83. kronos/utils/concurrency/_patterns.py +363 -0
  84. kronos/utils/concurrency/_primitives.py +328 -0
  85. kronos/utils/concurrency/_priority_queue.py +135 -0
  86. kronos/utils/concurrency/_resource_tracker.py +110 -0
  87. kronos/utils/concurrency/_run_async.py +67 -0
  88. kronos/utils/concurrency/_task.py +95 -0
  89. kronos/utils/concurrency/_utils.py +79 -0
  90. kronos/utils/fuzzy/__init__.py +14 -0
  91. kronos/utils/fuzzy/_extract_json.py +90 -0
  92. kronos/utils/fuzzy/_fuzzy_json.py +288 -0
  93. kronos/utils/fuzzy/_fuzzy_match.py +149 -0
  94. kronos/utils/fuzzy/_string_similarity.py +187 -0
  95. kronos/utils/fuzzy/_to_dict.py +396 -0
  96. kronos/utils/sql/__init__.py +13 -0
  97. kronos/utils/sql/_sql_validation.py +142 -0
  98. krons-0.1.0.dist-info/METADATA +70 -0
  99. krons-0.1.0.dist-info/RECORD +101 -0
  100. krons-0.1.0.dist-info/WHEEL +4 -0
  101. krons-0.1.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,392 @@
1
+ # Copyright (c) 2025 - 2026, HaiyangLi <quantocean.li at gmail dot com>
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """JSON serialization utilities built on orjson.
5
+
6
+ Provides flexible serialization with:
7
+ - Configurable type handling (Decimal, Enum, datetime, sets)
8
+ - Safe fallback mode for logging non-serializable objects
9
+ - NDJSON streaming for iterables
10
+ - Caching of default handlers for performance
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import contextlib
16
+ import datetime as dt
17
+ import decimal
18
+ import re
19
+ from collections.abc import Callable, Iterable, Mapping
20
+ from enum import Enum
21
+ from functools import lru_cache
22
+ from pathlib import Path
23
+ from textwrap import shorten
24
+ from typing import Any
25
+ from uuid import UUID
26
+
27
+ import orjson
28
+
29
+ __all__ = (
30
+ "get_orjson_default",
31
+ "json_dumpb",
32
+ "json_dump",
33
+ "json_lines_iter",
34
+ "make_options",
35
+ )
36
+
37
+ # Types with native orjson support (skip in custom order)
38
+ _NATIVE = (dt.datetime, dt.date, dt.time, UUID)
39
+ _ADDR_PAT = re.compile(r" at 0x[0-9A-Fa-f]+")
40
+ _SERIALIZATION_METHODS = ("model_dump", "to_dict", "dict")
41
+
42
+
43
+ def get_orjson_default(
44
+ *,
45
+ order: list[type] | None = None,
46
+ additional: Mapping[type, Callable[[Any], Any]] | None = None,
47
+ extend_default: bool = True,
48
+ deterministic_sets: bool = False,
49
+ decimal_as_float: bool = False,
50
+ enum_as_name: bool = False,
51
+ passthrough_datetime: bool = False,
52
+ safe_fallback: bool = False,
53
+ fallback_clip: int = 2048,
54
+ ) -> Callable[[Any], Any]:
55
+ """Build a `default=` callable for orjson.dumps with type-based dispatch.
56
+
57
+ Args:
58
+ order: Custom type priority order (checked before defaults).
59
+ additional: Extra type->serializer mappings.
60
+ extend_default: Merge order with defaults (True) or replace (False).
61
+ deterministic_sets: Sort sets for reproducible output (slower).
62
+ decimal_as_float: Decimal->float (lossy but compact).
63
+ enum_as_name: Enum->name instead of value.
64
+ passthrough_datetime: Use custom datetime serialization.
65
+ safe_fallback: Never raise; clip repr for unknown types (for logging).
66
+ fallback_clip: Max chars for safe_fallback repr.
67
+
68
+ Returns:
69
+ Callable suitable for orjson.dumps(default=...).
70
+ """
71
+ ser = _default_serializers(
72
+ deterministic_sets=deterministic_sets,
73
+ decimal_as_float=decimal_as_float,
74
+ enum_as_name=enum_as_name,
75
+ passthrough_datetime=passthrough_datetime,
76
+ )
77
+ if additional:
78
+ ser.update(additional)
79
+
80
+ base_order: list[type] = [Path, decimal.Decimal, set, frozenset]
81
+ if enum_as_name:
82
+ base_order.insert(0, Enum)
83
+ if passthrough_datetime:
84
+ base_order.insert(0, dt.datetime)
85
+
86
+ if order:
87
+ order_ = (
88
+ (base_order + [t for t in order if t not in base_order])
89
+ if extend_default
90
+ else list(order)
91
+ )
92
+ else:
93
+ order_ = base_order.copy()
94
+
95
+ if not passthrough_datetime: # Skip types on orjson's native fast path
96
+ order_ = [t for t in order_ if t not in _NATIVE]
97
+
98
+ order_tuple = tuple(order_)
99
+ cache: dict[type, Callable[[Any], Any]] = {}
100
+
101
+ def default(obj: Any) -> Any:
102
+ typ = obj.__class__
103
+ func = cache.get(typ)
104
+ if func is None:
105
+ for T in order_tuple:
106
+ if issubclass(typ, T):
107
+ f = ser.get(T)
108
+ if f:
109
+ cache[typ] = f
110
+ func = f
111
+ break
112
+ else:
113
+ methods = _SERIALIZATION_METHODS
114
+ for m in methods:
115
+ md = getattr(obj, m, None)
116
+ if callable(md):
117
+ with contextlib.suppress(Exception):
118
+ return md()
119
+
120
+ if safe_fallback:
121
+ if isinstance(obj, Exception):
122
+ return {"type": obj.__class__.__name__, "message": str(obj)}
123
+
124
+ return shorten(
125
+ repr(obj),
126
+ width=fallback_clip,
127
+ placeholder=f"...(+{len(repr(obj)) - fallback_clip} chars)",
128
+ )
129
+ raise TypeError(f"Type is not JSON serializable: {typ.__name__}")
130
+ return func(obj)
131
+
132
+ return default
133
+
134
+
135
+ def make_options(
136
+ *,
137
+ pretty: bool = False,
138
+ sort_keys: bool = False,
139
+ naive_utc: bool = False,
140
+ utc_z: bool = False,
141
+ append_newline: bool = False,
142
+ passthrough_datetime: bool = False,
143
+ allow_non_str_keys: bool = False,
144
+ ) -> int:
145
+ """Compose orjson option bit flags.
146
+
147
+ Args:
148
+ pretty: Indent with 2 spaces (OPT_INDENT_2).
149
+ sort_keys: Alphabetical key ordering (OPT_SORT_KEYS).
150
+ naive_utc: Treat naive datetime as UTC (OPT_NAIVE_UTC).
151
+ utc_z: Use 'Z' suffix for UTC times (OPT_UTC_Z).
152
+ append_newline: Add trailing newline (OPT_APPEND_NEWLINE).
153
+ passthrough_datetime: Custom datetime handling (OPT_PASSTHROUGH_DATETIME).
154
+ allow_non_str_keys: Allow int/UUID keys (OPT_NON_STR_KEYS).
155
+
156
+ Returns:
157
+ Combined option flags for orjson.dumps(option=...).
158
+ """
159
+ opt = 0
160
+ if append_newline:
161
+ opt |= orjson.OPT_APPEND_NEWLINE
162
+ if pretty:
163
+ opt |= orjson.OPT_INDENT_2
164
+ if sort_keys:
165
+ opt |= orjson.OPT_SORT_KEYS
166
+ if naive_utc:
167
+ opt |= orjson.OPT_NAIVE_UTC
168
+ if utc_z:
169
+ opt |= orjson.OPT_UTC_Z
170
+ if passthrough_datetime:
171
+ opt |= orjson.OPT_PASSTHROUGH_DATETIME
172
+ if allow_non_str_keys:
173
+ opt |= orjson.OPT_NON_STR_KEYS
174
+ return opt
175
+
176
+
177
+ def json_dumpb(
178
+ obj: Any,
179
+ *,
180
+ pretty: bool = False,
181
+ sort_keys: bool = False,
182
+ naive_utc: bool = False,
183
+ utc_z: bool = False,
184
+ append_newline: bool = False,
185
+ allow_non_str_keys: bool = False,
186
+ deterministic_sets: bool = False,
187
+ decimal_as_float: bool = False,
188
+ enum_as_name: bool = False,
189
+ passthrough_datetime: bool = False,
190
+ safe_fallback: bool = False,
191
+ fallback_clip: int = 2048,
192
+ default: Callable[[Any], Any] | None = None,
193
+ options: int | None = None,
194
+ ) -> bytes:
195
+ """Serialize to bytes (fast path for hot code).
196
+
197
+ Args:
198
+ obj: Object to serialize.
199
+ pretty: Indent output.
200
+ sort_keys: Alphabetical key order.
201
+ naive_utc: Naive datetime as UTC.
202
+ utc_z: Use 'Z' for UTC.
203
+ append_newline: Trailing newline.
204
+ allow_non_str_keys: Allow non-string dict keys.
205
+ deterministic_sets: Sort sets.
206
+ decimal_as_float: Decimal as float.
207
+ enum_as_name: Enum as name.
208
+ passthrough_datetime: Custom datetime handling.
209
+ safe_fallback: Never raise (for logging only).
210
+ fallback_clip: Max repr chars in safe mode.
211
+ default: Custom default callable (overrides above).
212
+ options: Pre-composed option flags (overrides above).
213
+
214
+ Returns:
215
+ JSON as bytes.
216
+ """
217
+ if default is None:
218
+ default = _cached_default(
219
+ deterministic_sets=deterministic_sets,
220
+ decimal_as_float=decimal_as_float,
221
+ enum_as_name=enum_as_name,
222
+ passthrough_datetime=passthrough_datetime,
223
+ safe_fallback=safe_fallback,
224
+ fallback_clip=fallback_clip,
225
+ )
226
+ opt = (
227
+ options
228
+ if options is not None
229
+ else make_options(
230
+ pretty=pretty,
231
+ sort_keys=sort_keys,
232
+ naive_utc=naive_utc,
233
+ utc_z=utc_z,
234
+ append_newline=append_newline,
235
+ passthrough_datetime=passthrough_datetime,
236
+ allow_non_str_keys=allow_non_str_keys,
237
+ )
238
+ )
239
+ return orjson.dumps(obj, default=default, option=opt)
240
+
241
+
242
+ def json_dump(
243
+ obj: Any,
244
+ *,
245
+ sort_keys: bool = False,
246
+ deterministic_sets: bool = False,
247
+ decode: bool = False,
248
+ as_loaded: bool = False,
249
+ **kwargs: Any,
250
+ ) -> str | bytes | Any:
251
+ """Serialize to JSON with flexible output format.
252
+
253
+ Args:
254
+ obj: Object to serialize.
255
+ sort_keys: Alphabetical key order.
256
+ deterministic_sets: Sort sets.
257
+ decode: Return str instead of bytes.
258
+ as_loaded: Parse output back to dict/list (requires decode=True).
259
+ **kwargs: Passed to json_dumpb.
260
+
261
+ Returns:
262
+ bytes (default), str (decode=True), or dict/list (as_loaded=True).
263
+
264
+ Raises:
265
+ ValueError: If as_loaded=True without decode=True.
266
+ """
267
+ if not decode and as_loaded:
268
+ raise ValueError("as_loaded=True requires decode=True")
269
+
270
+ bytes_ = json_dumpb(
271
+ obj,
272
+ sort_keys=sort_keys,
273
+ deterministic_sets=deterministic_sets,
274
+ **kwargs,
275
+ )
276
+
277
+ if not decode:
278
+ return bytes_
279
+ return orjson.loads(bytes_) if as_loaded else bytes_.decode("utf-8")
280
+
281
+
282
+ def json_lines_iter(
283
+ it: Iterable[Any],
284
+ *,
285
+ deterministic_sets: bool = False,
286
+ decimal_as_float: bool = False,
287
+ enum_as_name: bool = False,
288
+ passthrough_datetime: bool = False,
289
+ safe_fallback: bool = False,
290
+ fallback_clip: int = 2048,
291
+ naive_utc: bool = False,
292
+ utc_z: bool = False,
293
+ allow_non_str_keys: bool = False,
294
+ default: Callable[[Any], Any] | None = None,
295
+ options: int | None = None,
296
+ ) -> Iterable[bytes]:
297
+ """Stream iterable as NDJSON (newline-delimited JSON bytes).
298
+
299
+ Each item serialized to one line with trailing newline. Suitable for
300
+ streaming large datasets or log output.
301
+
302
+ Args:
303
+ it: Iterable of objects to serialize.
304
+ deterministic_sets: Sort sets.
305
+ decimal_as_float: Decimal as float.
306
+ enum_as_name: Enum as name.
307
+ passthrough_datetime: Custom datetime handling.
308
+ safe_fallback: Never raise (for logging).
309
+ fallback_clip: Max repr chars in safe mode.
310
+ naive_utc: Naive datetime as UTC.
311
+ utc_z: Use 'Z' for UTC.
312
+ allow_non_str_keys: Allow non-string dict keys.
313
+ default: Custom default callable.
314
+ options: Pre-composed option flags (newline always added).
315
+
316
+ Yields:
317
+ JSON bytes for each item with trailing newline.
318
+ """
319
+ if default is None:
320
+ default = _cached_default(
321
+ deterministic_sets=deterministic_sets,
322
+ decimal_as_float=decimal_as_float,
323
+ enum_as_name=enum_as_name,
324
+ passthrough_datetime=passthrough_datetime,
325
+ safe_fallback=safe_fallback,
326
+ fallback_clip=fallback_clip,
327
+ )
328
+ if options is None:
329
+ opt = make_options(
330
+ pretty=False,
331
+ sort_keys=False,
332
+ naive_utc=naive_utc,
333
+ utc_z=utc_z,
334
+ append_newline=True,
335
+ passthrough_datetime=passthrough_datetime,
336
+ allow_non_str_keys=allow_non_str_keys,
337
+ )
338
+ else:
339
+ opt = options | orjson.OPT_APPEND_NEWLINE # Always enforce newline
340
+
341
+ for item in it:
342
+ yield orjson.dumps(item, default=default, option=opt)
343
+
344
+
345
+ @lru_cache(maxsize=128)
346
+ def _cached_default(
347
+ deterministic_sets: bool,
348
+ decimal_as_float: bool,
349
+ enum_as_name: bool,
350
+ passthrough_datetime: bool,
351
+ safe_fallback: bool,
352
+ fallback_clip: int,
353
+ ) -> Callable[[Any], Any]:
354
+ """Cache default handlers to avoid repeated construction."""
355
+ return get_orjson_default(
356
+ deterministic_sets=deterministic_sets,
357
+ decimal_as_float=decimal_as_float,
358
+ enum_as_name=enum_as_name,
359
+ passthrough_datetime=passthrough_datetime,
360
+ safe_fallback=safe_fallback,
361
+ fallback_clip=fallback_clip,
362
+ )
363
+
364
+
365
+ def _default_serializers(
366
+ deterministic_sets: bool,
367
+ decimal_as_float: bool,
368
+ enum_as_name: bool,
369
+ passthrough_datetime: bool,
370
+ ) -> dict[type, Callable[[Any], Any]]:
371
+ """Build type->serializer mapping based on configuration."""
372
+
373
+ def normalize_for_sorting(x: Any) -> str:
374
+ """Strip memory addresses from repr for stable sorting."""
375
+ return _ADDR_PAT.sub(" at 0x?", str(x))
376
+
377
+ def stable_sorted_iterable(o: Iterable[Any]) -> list[Any]:
378
+ """Sort mixed-type iterables by (class_name, normalized_repr)."""
379
+ return sorted(o, key=lambda x: (x.__class__.__name__, normalize_for_sorting(x)))
380
+
381
+ ser: dict[type, Callable[[Any], Any]] = {
382
+ Path: str,
383
+ decimal.Decimal: (float if decimal_as_float else str),
384
+ set: (stable_sorted_iterable if deterministic_sets else list),
385
+ frozenset: (stable_sorted_iterable if deterministic_sets else list),
386
+ }
387
+ if enum_as_name:
388
+ ser[Enum] = lambda e: e.name
389
+
390
+ if passthrough_datetime:
391
+ ser[dt.datetime] = lambda o: o.isoformat()
392
+ return ser
@@ -0,0 +1,63 @@
1
+ # Copyright (c) 2025 - 2026, HaiyangLi <quantocean.li at gmail dot com>
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Thread-safe lazy initialization utility."""
5
+
6
+ import threading
7
+ from collections.abc import Callable
8
+
9
+ __all__ = ("LazyInit",)
10
+
11
+
12
+ class LazyInit:
13
+ """Thread-safe lazy initialization helper using double-checked locking.
14
+
15
+ Defers expensive imports/setup until first use. Guarantees init_func
16
+ runs exactly once even under concurrent access.
17
+
18
+ Example:
19
+ _lazy = LazyInit()
20
+ _MODEL_LIKE = None
21
+
22
+ def _do_init():
23
+ global _MODEL_LIKE
24
+ from pydantic import BaseModel
25
+ _MODEL_LIKE = (BaseModel,)
26
+
27
+ def my_function(x):
28
+ _lazy.ensure(_do_init)
29
+ # _MODEL_LIKE is now initialized
30
+
31
+ Attributes:
32
+ initialized: True after ensure() has completed successfully.
33
+ """
34
+
35
+ __slots__ = ("_initialized", "_lock")
36
+
37
+ def __init__(self) -> None:
38
+ """Create uninitialized LazyInit instance."""
39
+ self._initialized = False
40
+ self._lock = threading.RLock()
41
+
42
+ @property
43
+ def initialized(self) -> bool:
44
+ """Check if initialization has completed."""
45
+ return self._initialized
46
+
47
+ def ensure(self, init_func: Callable[[], None]) -> None:
48
+ """Execute init_func exactly once, thread-safely.
49
+
50
+ Uses double-checked locking: fast path (no lock) when already
51
+ initialized, lock acquisition only on first call.
52
+
53
+ Args:
54
+ init_func: Initialization function. Should be idempotent
55
+ as a safety measure (though only called once).
56
+ """
57
+ if self._initialized:
58
+ return
59
+ with self._lock:
60
+ if self._initialized:
61
+ return
62
+ init_func()
63
+ self._initialized = True
@@ -0,0 +1,165 @@
1
+ # Copyright (c) 2025 - 2026, HaiyangLi <quantocean.li at gmail dot com>
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """List conversion utilities with flattening, deduplication, and NA handling."""
5
+
6
+ from __future__ import annotations
7
+
8
+ from collections.abc import Iterable, Mapping
9
+ from enum import Enum as _Enum
10
+ from typing import Any
11
+
12
+ from ._hash import compute_hash
13
+ from ._lazy_init import LazyInit
14
+
15
+ __all__ = ("to_list",)
16
+
17
+ _lazy = LazyInit()
18
+ _MODEL_LIKE = None
19
+ _MAP_LIKE = None
20
+ _SINGLETONE_TYPES = None
21
+ _SKIP_TYPE = None
22
+ _SKIP_TUPLE_SET = None
23
+ _BYTE_LIKE = (str, bytes, bytearray)
24
+ _TUPLE_SET = (tuple, set, frozenset)
25
+
26
+
27
+ def _do_init() -> None:
28
+ """Initialize lazy type constants (Pydantic, kron sentinels)."""
29
+ from pydantic import BaseModel
30
+ from pydantic_core import PydanticUndefinedType
31
+
32
+ from kronos.types import UndefinedType, UnsetType
33
+
34
+ global _MODEL_LIKE, _MAP_LIKE, _SINGLETONE_TYPES, _SKIP_TYPE, _SKIP_TUPLE_SET
35
+ _MODEL_LIKE = (BaseModel,)
36
+ _MAP_LIKE = (Mapping, *_MODEL_LIKE)
37
+ _SINGLETONE_TYPES = (UndefinedType, UnsetType, PydanticUndefinedType)
38
+ _SKIP_TYPE = (*_BYTE_LIKE, *_MAP_LIKE, _Enum)
39
+ _SKIP_TUPLE_SET = (*_SKIP_TYPE, *_TUPLE_SET)
40
+
41
+
42
+ def to_list(
43
+ input_: Any,
44
+ /,
45
+ *,
46
+ flatten: bool = False,
47
+ dropna: bool = False,
48
+ unique: bool = False,
49
+ use_values: bool = False,
50
+ flatten_tuple_set: bool = False,
51
+ ) -> list:
52
+ """Convert input to list with optional transformations.
53
+
54
+ Type handling:
55
+ - None/Undefined/Unset: returns []
56
+ - list: returned as-is (not copied)
57
+ - Enum class: list of members (or values if use_values=True)
58
+ - str/bytes/bytearray: wrapped as [input_] unless use_values=True
59
+ - Mapping: wrapped as [input_] unless use_values=True (extracts values)
60
+ - BaseModel: wrapped as [input_]
61
+ - Other iterables: converted via list()
62
+ - Non-iterables: wrapped as [input_]
63
+
64
+ Args:
65
+ input_: Value to convert.
66
+ flatten: Recursively flatten nested iterables.
67
+ dropna: Remove None and sentinel values (Undefined, Unset).
68
+ unique: Remove duplicates. Requires flatten=True.
69
+ use_values: Extract values from Enum classes and Mappings.
70
+ flatten_tuple_set: When flatten=True, also flatten tuples/sets/frozensets.
71
+
72
+ Returns:
73
+ Processed list.
74
+
75
+ Raises:
76
+ ValueError: unique=True without flatten=True, or unhashable non-mapping item.
77
+
78
+ Edge Cases:
79
+ - Nested lists: preserved unless flatten=True
80
+ - Unhashable items with unique=True: falls back to compute_hash for mappings
81
+ - Empty input: returns []
82
+ """
83
+ _lazy.ensure(_do_init)
84
+
85
+ def _process_list(
86
+ lst: list[Any],
87
+ flatten: bool,
88
+ dropna: bool,
89
+ skip_types: tuple[type, ...],
90
+ ) -> list[Any]:
91
+ """Recursively process list with flatten/dropna logic."""
92
+ result: list[Any] = []
93
+ for item in lst:
94
+ if dropna and (item is None or isinstance(item, _SINGLETONE_TYPES)):
95
+ continue
96
+ is_iterable = isinstance(item, Iterable)
97
+ should_skip = isinstance(item, skip_types)
98
+ if is_iterable and not should_skip:
99
+ item_list = list(item)
100
+ if flatten:
101
+ result.extend(_process_list(item_list, flatten, dropna, skip_types))
102
+ else:
103
+ result.append(_process_list(item_list, flatten, dropna, skip_types))
104
+ else:
105
+ result.append(item)
106
+ return result
107
+
108
+ def _to_list_type(input_: Any, use_values: bool) -> list[Any]:
109
+ """Convert input to initial list based on type."""
110
+ if input_ is None or isinstance(input_, _SINGLETONE_TYPES):
111
+ return []
112
+ if isinstance(input_, list):
113
+ return input_
114
+ if isinstance(input_, type) and issubclass(input_, _Enum):
115
+ members = input_.__members__.values()
116
+ return [member.value for member in members] if use_values else list(members)
117
+ if isinstance(input_, _BYTE_LIKE):
118
+ return list(input_) if use_values else [input_]
119
+ if isinstance(input_, Mapping):
120
+ return list(input_.values()) if use_values and hasattr(input_, "values") else [input_]
121
+ if isinstance(input_, _MODEL_LIKE):
122
+ return [input_]
123
+ if isinstance(input_, Iterable) and not isinstance(input_, _BYTE_LIKE):
124
+ return list(input_)
125
+ return [input_]
126
+
127
+ if unique and not flatten:
128
+ raise ValueError("unique=True requires flatten=True")
129
+
130
+ initial_list = _to_list_type(input_, use_values=use_values)
131
+ skip_types: tuple[type, ...] = _SKIP_TYPE if flatten_tuple_set else _SKIP_TUPLE_SET
132
+ processed = _process_list(initial_list, flatten=flatten, dropna=dropna, skip_types=skip_types)
133
+
134
+ if unique:
135
+ seen = set()
136
+ out = []
137
+ use_hash_fallback = False
138
+ for i in processed:
139
+ try:
140
+ if not use_hash_fallback and i not in seen:
141
+ seen.add(i)
142
+ out.append(i)
143
+ except TypeError:
144
+ if not use_hash_fallback:
145
+ # Restart with hash-based deduplication
146
+ use_hash_fallback = True
147
+ seen = set()
148
+ out = []
149
+ for j in processed:
150
+ try:
151
+ hash_value = hash(j)
152
+ except TypeError:
153
+ if _MAP_LIKE is not None and isinstance(j, _MAP_LIKE):
154
+ hash_value = compute_hash(j)
155
+ else:
156
+ raise ValueError(
157
+ "Unhashable type encountered in list unique value processing."
158
+ )
159
+ if hash_value not in seen:
160
+ seen.add(hash_value)
161
+ out.append(j)
162
+ break
163
+ return out
164
+
165
+ return processed
@@ -0,0 +1,85 @@
1
+ # Copyright (c) 2025 - 2026, HaiyangLi <quantocean.li at gmail dot com>
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Numeric conversion utilities with bounds checking and security limits."""
5
+
6
+ import math
7
+ from decimal import Decimal
8
+ from typing import Any
9
+
10
+ __all__ = ("to_num", "MAX_NUMBER_STRING_LENGTH")
11
+
12
+ MAX_NUMBER_STRING_LENGTH = 1000
13
+ """Max string length for numeric conversion (DoS protection)."""
14
+
15
+
16
+ def to_num(
17
+ input_: Any,
18
+ /,
19
+ *,
20
+ upper_bound: int | float | None = None,
21
+ lower_bound: int | float | None = None,
22
+ num_type: type[int] | type[float] = float,
23
+ precision: int | None = None,
24
+ allow_inf: bool = False,
25
+ ) -> int | float:
26
+ """Convert input to numeric type with bounds checking and validation.
27
+
28
+ Handles: bool, int, float, Decimal, str. Strings are stripped before parsing.
29
+
30
+ Args:
31
+ input_: Value to convert. Bools treated as int (True=1, False=0).
32
+ upper_bound: Maximum allowed value (inclusive).
33
+ lower_bound: Minimum allowed value (inclusive).
34
+ num_type: Target type, must be `int` or `float`.
35
+ precision: Decimal places for float rounding (ignored for int).
36
+ allow_inf: Permit infinity values. Default False.
37
+
38
+ Returns:
39
+ Converted numeric value of type `num_type`.
40
+
41
+ Raises:
42
+ ValueError: Empty/too-long string, out of bounds, NaN, inf (when disallowed).
43
+ TypeError: Unsupported input type.
44
+
45
+ Edge Cases:
46
+ - Empty string: raises ValueError
47
+ - Whitespace-only string: raises ValueError (stripped to empty)
48
+ - "inf"/"-inf": raises unless allow_inf=True
49
+ - "nan": always raises
50
+ - Decimal: converted via float (may lose precision)
51
+ """
52
+ if num_type not in (int, float):
53
+ raise ValueError(f"Invalid number type: {num_type}")
54
+
55
+ if isinstance(input_, (bool, int, float, Decimal)):
56
+ value = float(input_)
57
+ elif isinstance(input_, str):
58
+ input_ = input_.strip()
59
+ if not input_:
60
+ raise ValueError("Empty string cannot be converted to number")
61
+ if len(input_) > MAX_NUMBER_STRING_LENGTH:
62
+ msg = f"String length ({len(input_)}) exceeds maximum ({MAX_NUMBER_STRING_LENGTH})"
63
+ raise ValueError(msg)
64
+ try:
65
+ value = float(input_)
66
+ except ValueError as e:
67
+ raise ValueError(f"Cannot convert '{input_}' to number") from e
68
+ else:
69
+ raise TypeError(f"Cannot convert {type(input_).__name__} to number")
70
+
71
+ # NaN bypasses all comparisons; always reject
72
+ if math.isnan(value):
73
+ raise ValueError("NaN is not allowed")
74
+ if math.isinf(value) and not allow_inf:
75
+ raise ValueError("Infinity is not allowed (use allow_inf=True to permit)")
76
+
77
+ if upper_bound is not None and value > upper_bound:
78
+ raise ValueError(f"Value {value} exceeds upper bound {upper_bound}")
79
+ if lower_bound is not None and value < lower_bound:
80
+ raise ValueError(f"Value {value} below lower bound {lower_bound}")
81
+
82
+ if precision is not None and num_type is float:
83
+ value = round(value, precision)
84
+
85
+ return num_type(value)