krons 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kronos/__init__.py +0 -0
- kronos/core/__init__.py +145 -0
- kronos/core/broadcaster.py +116 -0
- kronos/core/element.py +225 -0
- kronos/core/event.py +316 -0
- kronos/core/eventbus.py +116 -0
- kronos/core/flow.py +356 -0
- kronos/core/graph.py +442 -0
- kronos/core/node.py +982 -0
- kronos/core/pile.py +575 -0
- kronos/core/processor.py +494 -0
- kronos/core/progression.py +296 -0
- kronos/enforcement/__init__.py +57 -0
- kronos/enforcement/common/__init__.py +34 -0
- kronos/enforcement/common/boolean.py +85 -0
- kronos/enforcement/common/choice.py +97 -0
- kronos/enforcement/common/mapping.py +118 -0
- kronos/enforcement/common/model.py +102 -0
- kronos/enforcement/common/number.py +98 -0
- kronos/enforcement/common/string.py +140 -0
- kronos/enforcement/context.py +129 -0
- kronos/enforcement/policy.py +80 -0
- kronos/enforcement/registry.py +153 -0
- kronos/enforcement/rule.py +312 -0
- kronos/enforcement/service.py +370 -0
- kronos/enforcement/validator.py +198 -0
- kronos/errors.py +146 -0
- kronos/operations/__init__.py +32 -0
- kronos/operations/builder.py +228 -0
- kronos/operations/flow.py +398 -0
- kronos/operations/node.py +101 -0
- kronos/operations/registry.py +92 -0
- kronos/protocols.py +414 -0
- kronos/py.typed +0 -0
- kronos/services/__init__.py +81 -0
- kronos/services/backend.py +286 -0
- kronos/services/endpoint.py +608 -0
- kronos/services/hook.py +471 -0
- kronos/services/imodel.py +465 -0
- kronos/services/registry.py +115 -0
- kronos/services/utilities/__init__.py +36 -0
- kronos/services/utilities/header_factory.py +87 -0
- kronos/services/utilities/rate_limited_executor.py +271 -0
- kronos/services/utilities/rate_limiter.py +180 -0
- kronos/services/utilities/resilience.py +414 -0
- kronos/session/__init__.py +41 -0
- kronos/session/exchange.py +258 -0
- kronos/session/message.py +60 -0
- kronos/session/session.py +411 -0
- kronos/specs/__init__.py +25 -0
- kronos/specs/adapters/__init__.py +0 -0
- kronos/specs/adapters/_utils.py +45 -0
- kronos/specs/adapters/dataclass_field.py +246 -0
- kronos/specs/adapters/factory.py +56 -0
- kronos/specs/adapters/pydantic_adapter.py +309 -0
- kronos/specs/adapters/sql_ddl.py +946 -0
- kronos/specs/catalog/__init__.py +36 -0
- kronos/specs/catalog/_audit.py +39 -0
- kronos/specs/catalog/_common.py +43 -0
- kronos/specs/catalog/_content.py +59 -0
- kronos/specs/catalog/_enforcement.py +70 -0
- kronos/specs/factory.py +120 -0
- kronos/specs/operable.py +314 -0
- kronos/specs/phrase.py +405 -0
- kronos/specs/protocol.py +140 -0
- kronos/specs/spec.py +506 -0
- kronos/types/__init__.py +60 -0
- kronos/types/_sentinel.py +311 -0
- kronos/types/base.py +369 -0
- kronos/types/db_types.py +260 -0
- kronos/types/identity.py +66 -0
- kronos/utils/__init__.py +40 -0
- kronos/utils/_hash.py +234 -0
- kronos/utils/_json_dump.py +392 -0
- kronos/utils/_lazy_init.py +63 -0
- kronos/utils/_to_list.py +165 -0
- kronos/utils/_to_num.py +85 -0
- kronos/utils/_utils.py +375 -0
- kronos/utils/concurrency/__init__.py +205 -0
- kronos/utils/concurrency/_async_call.py +333 -0
- kronos/utils/concurrency/_cancel.py +122 -0
- kronos/utils/concurrency/_errors.py +96 -0
- kronos/utils/concurrency/_patterns.py +363 -0
- kronos/utils/concurrency/_primitives.py +328 -0
- kronos/utils/concurrency/_priority_queue.py +135 -0
- kronos/utils/concurrency/_resource_tracker.py +110 -0
- kronos/utils/concurrency/_run_async.py +67 -0
- kronos/utils/concurrency/_task.py +95 -0
- kronos/utils/concurrency/_utils.py +79 -0
- kronos/utils/fuzzy/__init__.py +14 -0
- kronos/utils/fuzzy/_extract_json.py +90 -0
- kronos/utils/fuzzy/_fuzzy_json.py +288 -0
- kronos/utils/fuzzy/_fuzzy_match.py +149 -0
- kronos/utils/fuzzy/_string_similarity.py +187 -0
- kronos/utils/fuzzy/_to_dict.py +396 -0
- kronos/utils/sql/__init__.py +13 -0
- kronos/utils/sql/_sql_validation.py +142 -0
- krons-0.1.0.dist-info/METADATA +70 -0
- krons-0.1.0.dist-info/RECORD +101 -0
- krons-0.1.0.dist-info/WHEEL +4 -0
- krons-0.1.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,392 @@
|
|
|
1
|
+
# Copyright (c) 2025 - 2026, HaiyangLi <quantocean.li at gmail dot com>
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
"""JSON serialization utilities built on orjson.
|
|
5
|
+
|
|
6
|
+
Provides flexible serialization with:
|
|
7
|
+
- Configurable type handling (Decimal, Enum, datetime, sets)
|
|
8
|
+
- Safe fallback mode for logging non-serializable objects
|
|
9
|
+
- NDJSON streaming for iterables
|
|
10
|
+
- Caching of default handlers for performance
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import contextlib
|
|
16
|
+
import datetime as dt
|
|
17
|
+
import decimal
|
|
18
|
+
import re
|
|
19
|
+
from collections.abc import Callable, Iterable, Mapping
|
|
20
|
+
from enum import Enum
|
|
21
|
+
from functools import lru_cache
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from textwrap import shorten
|
|
24
|
+
from typing import Any
|
|
25
|
+
from uuid import UUID
|
|
26
|
+
|
|
27
|
+
import orjson
|
|
28
|
+
|
|
29
|
+
__all__ = (
|
|
30
|
+
"get_orjson_default",
|
|
31
|
+
"json_dumpb",
|
|
32
|
+
"json_dump",
|
|
33
|
+
"json_lines_iter",
|
|
34
|
+
"make_options",
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Types with native orjson support (skip in custom order)
|
|
38
|
+
_NATIVE = (dt.datetime, dt.date, dt.time, UUID)
|
|
39
|
+
_ADDR_PAT = re.compile(r" at 0x[0-9A-Fa-f]+")
|
|
40
|
+
_SERIALIZATION_METHODS = ("model_dump", "to_dict", "dict")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_orjson_default(
|
|
44
|
+
*,
|
|
45
|
+
order: list[type] | None = None,
|
|
46
|
+
additional: Mapping[type, Callable[[Any], Any]] | None = None,
|
|
47
|
+
extend_default: bool = True,
|
|
48
|
+
deterministic_sets: bool = False,
|
|
49
|
+
decimal_as_float: bool = False,
|
|
50
|
+
enum_as_name: bool = False,
|
|
51
|
+
passthrough_datetime: bool = False,
|
|
52
|
+
safe_fallback: bool = False,
|
|
53
|
+
fallback_clip: int = 2048,
|
|
54
|
+
) -> Callable[[Any], Any]:
|
|
55
|
+
"""Build a `default=` callable for orjson.dumps with type-based dispatch.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
order: Custom type priority order (checked before defaults).
|
|
59
|
+
additional: Extra type->serializer mappings.
|
|
60
|
+
extend_default: Merge order with defaults (True) or replace (False).
|
|
61
|
+
deterministic_sets: Sort sets for reproducible output (slower).
|
|
62
|
+
decimal_as_float: Decimal->float (lossy but compact).
|
|
63
|
+
enum_as_name: Enum->name instead of value.
|
|
64
|
+
passthrough_datetime: Use custom datetime serialization.
|
|
65
|
+
safe_fallback: Never raise; clip repr for unknown types (for logging).
|
|
66
|
+
fallback_clip: Max chars for safe_fallback repr.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Callable suitable for orjson.dumps(default=...).
|
|
70
|
+
"""
|
|
71
|
+
ser = _default_serializers(
|
|
72
|
+
deterministic_sets=deterministic_sets,
|
|
73
|
+
decimal_as_float=decimal_as_float,
|
|
74
|
+
enum_as_name=enum_as_name,
|
|
75
|
+
passthrough_datetime=passthrough_datetime,
|
|
76
|
+
)
|
|
77
|
+
if additional:
|
|
78
|
+
ser.update(additional)
|
|
79
|
+
|
|
80
|
+
base_order: list[type] = [Path, decimal.Decimal, set, frozenset]
|
|
81
|
+
if enum_as_name:
|
|
82
|
+
base_order.insert(0, Enum)
|
|
83
|
+
if passthrough_datetime:
|
|
84
|
+
base_order.insert(0, dt.datetime)
|
|
85
|
+
|
|
86
|
+
if order:
|
|
87
|
+
order_ = (
|
|
88
|
+
(base_order + [t for t in order if t not in base_order])
|
|
89
|
+
if extend_default
|
|
90
|
+
else list(order)
|
|
91
|
+
)
|
|
92
|
+
else:
|
|
93
|
+
order_ = base_order.copy()
|
|
94
|
+
|
|
95
|
+
if not passthrough_datetime: # Skip types on orjson's native fast path
|
|
96
|
+
order_ = [t for t in order_ if t not in _NATIVE]
|
|
97
|
+
|
|
98
|
+
order_tuple = tuple(order_)
|
|
99
|
+
cache: dict[type, Callable[[Any], Any]] = {}
|
|
100
|
+
|
|
101
|
+
def default(obj: Any) -> Any:
|
|
102
|
+
typ = obj.__class__
|
|
103
|
+
func = cache.get(typ)
|
|
104
|
+
if func is None:
|
|
105
|
+
for T in order_tuple:
|
|
106
|
+
if issubclass(typ, T):
|
|
107
|
+
f = ser.get(T)
|
|
108
|
+
if f:
|
|
109
|
+
cache[typ] = f
|
|
110
|
+
func = f
|
|
111
|
+
break
|
|
112
|
+
else:
|
|
113
|
+
methods = _SERIALIZATION_METHODS
|
|
114
|
+
for m in methods:
|
|
115
|
+
md = getattr(obj, m, None)
|
|
116
|
+
if callable(md):
|
|
117
|
+
with contextlib.suppress(Exception):
|
|
118
|
+
return md()
|
|
119
|
+
|
|
120
|
+
if safe_fallback:
|
|
121
|
+
if isinstance(obj, Exception):
|
|
122
|
+
return {"type": obj.__class__.__name__, "message": str(obj)}
|
|
123
|
+
|
|
124
|
+
return shorten(
|
|
125
|
+
repr(obj),
|
|
126
|
+
width=fallback_clip,
|
|
127
|
+
placeholder=f"...(+{len(repr(obj)) - fallback_clip} chars)",
|
|
128
|
+
)
|
|
129
|
+
raise TypeError(f"Type is not JSON serializable: {typ.__name__}")
|
|
130
|
+
return func(obj)
|
|
131
|
+
|
|
132
|
+
return default
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def make_options(
|
|
136
|
+
*,
|
|
137
|
+
pretty: bool = False,
|
|
138
|
+
sort_keys: bool = False,
|
|
139
|
+
naive_utc: bool = False,
|
|
140
|
+
utc_z: bool = False,
|
|
141
|
+
append_newline: bool = False,
|
|
142
|
+
passthrough_datetime: bool = False,
|
|
143
|
+
allow_non_str_keys: bool = False,
|
|
144
|
+
) -> int:
|
|
145
|
+
"""Compose orjson option bit flags.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
pretty: Indent with 2 spaces (OPT_INDENT_2).
|
|
149
|
+
sort_keys: Alphabetical key ordering (OPT_SORT_KEYS).
|
|
150
|
+
naive_utc: Treat naive datetime as UTC (OPT_NAIVE_UTC).
|
|
151
|
+
utc_z: Use 'Z' suffix for UTC times (OPT_UTC_Z).
|
|
152
|
+
append_newline: Add trailing newline (OPT_APPEND_NEWLINE).
|
|
153
|
+
passthrough_datetime: Custom datetime handling (OPT_PASSTHROUGH_DATETIME).
|
|
154
|
+
allow_non_str_keys: Allow int/UUID keys (OPT_NON_STR_KEYS).
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
Combined option flags for orjson.dumps(option=...).
|
|
158
|
+
"""
|
|
159
|
+
opt = 0
|
|
160
|
+
if append_newline:
|
|
161
|
+
opt |= orjson.OPT_APPEND_NEWLINE
|
|
162
|
+
if pretty:
|
|
163
|
+
opt |= orjson.OPT_INDENT_2
|
|
164
|
+
if sort_keys:
|
|
165
|
+
opt |= orjson.OPT_SORT_KEYS
|
|
166
|
+
if naive_utc:
|
|
167
|
+
opt |= orjson.OPT_NAIVE_UTC
|
|
168
|
+
if utc_z:
|
|
169
|
+
opt |= orjson.OPT_UTC_Z
|
|
170
|
+
if passthrough_datetime:
|
|
171
|
+
opt |= orjson.OPT_PASSTHROUGH_DATETIME
|
|
172
|
+
if allow_non_str_keys:
|
|
173
|
+
opt |= orjson.OPT_NON_STR_KEYS
|
|
174
|
+
return opt
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def json_dumpb(
|
|
178
|
+
obj: Any,
|
|
179
|
+
*,
|
|
180
|
+
pretty: bool = False,
|
|
181
|
+
sort_keys: bool = False,
|
|
182
|
+
naive_utc: bool = False,
|
|
183
|
+
utc_z: bool = False,
|
|
184
|
+
append_newline: bool = False,
|
|
185
|
+
allow_non_str_keys: bool = False,
|
|
186
|
+
deterministic_sets: bool = False,
|
|
187
|
+
decimal_as_float: bool = False,
|
|
188
|
+
enum_as_name: bool = False,
|
|
189
|
+
passthrough_datetime: bool = False,
|
|
190
|
+
safe_fallback: bool = False,
|
|
191
|
+
fallback_clip: int = 2048,
|
|
192
|
+
default: Callable[[Any], Any] | None = None,
|
|
193
|
+
options: int | None = None,
|
|
194
|
+
) -> bytes:
|
|
195
|
+
"""Serialize to bytes (fast path for hot code).
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
obj: Object to serialize.
|
|
199
|
+
pretty: Indent output.
|
|
200
|
+
sort_keys: Alphabetical key order.
|
|
201
|
+
naive_utc: Naive datetime as UTC.
|
|
202
|
+
utc_z: Use 'Z' for UTC.
|
|
203
|
+
append_newline: Trailing newline.
|
|
204
|
+
allow_non_str_keys: Allow non-string dict keys.
|
|
205
|
+
deterministic_sets: Sort sets.
|
|
206
|
+
decimal_as_float: Decimal as float.
|
|
207
|
+
enum_as_name: Enum as name.
|
|
208
|
+
passthrough_datetime: Custom datetime handling.
|
|
209
|
+
safe_fallback: Never raise (for logging only).
|
|
210
|
+
fallback_clip: Max repr chars in safe mode.
|
|
211
|
+
default: Custom default callable (overrides above).
|
|
212
|
+
options: Pre-composed option flags (overrides above).
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
JSON as bytes.
|
|
216
|
+
"""
|
|
217
|
+
if default is None:
|
|
218
|
+
default = _cached_default(
|
|
219
|
+
deterministic_sets=deterministic_sets,
|
|
220
|
+
decimal_as_float=decimal_as_float,
|
|
221
|
+
enum_as_name=enum_as_name,
|
|
222
|
+
passthrough_datetime=passthrough_datetime,
|
|
223
|
+
safe_fallback=safe_fallback,
|
|
224
|
+
fallback_clip=fallback_clip,
|
|
225
|
+
)
|
|
226
|
+
opt = (
|
|
227
|
+
options
|
|
228
|
+
if options is not None
|
|
229
|
+
else make_options(
|
|
230
|
+
pretty=pretty,
|
|
231
|
+
sort_keys=sort_keys,
|
|
232
|
+
naive_utc=naive_utc,
|
|
233
|
+
utc_z=utc_z,
|
|
234
|
+
append_newline=append_newline,
|
|
235
|
+
passthrough_datetime=passthrough_datetime,
|
|
236
|
+
allow_non_str_keys=allow_non_str_keys,
|
|
237
|
+
)
|
|
238
|
+
)
|
|
239
|
+
return orjson.dumps(obj, default=default, option=opt)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def json_dump(
|
|
243
|
+
obj: Any,
|
|
244
|
+
*,
|
|
245
|
+
sort_keys: bool = False,
|
|
246
|
+
deterministic_sets: bool = False,
|
|
247
|
+
decode: bool = False,
|
|
248
|
+
as_loaded: bool = False,
|
|
249
|
+
**kwargs: Any,
|
|
250
|
+
) -> str | bytes | Any:
|
|
251
|
+
"""Serialize to JSON with flexible output format.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
obj: Object to serialize.
|
|
255
|
+
sort_keys: Alphabetical key order.
|
|
256
|
+
deterministic_sets: Sort sets.
|
|
257
|
+
decode: Return str instead of bytes.
|
|
258
|
+
as_loaded: Parse output back to dict/list (requires decode=True).
|
|
259
|
+
**kwargs: Passed to json_dumpb.
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
bytes (default), str (decode=True), or dict/list (as_loaded=True).
|
|
263
|
+
|
|
264
|
+
Raises:
|
|
265
|
+
ValueError: If as_loaded=True without decode=True.
|
|
266
|
+
"""
|
|
267
|
+
if not decode and as_loaded:
|
|
268
|
+
raise ValueError("as_loaded=True requires decode=True")
|
|
269
|
+
|
|
270
|
+
bytes_ = json_dumpb(
|
|
271
|
+
obj,
|
|
272
|
+
sort_keys=sort_keys,
|
|
273
|
+
deterministic_sets=deterministic_sets,
|
|
274
|
+
**kwargs,
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
if not decode:
|
|
278
|
+
return bytes_
|
|
279
|
+
return orjson.loads(bytes_) if as_loaded else bytes_.decode("utf-8")
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def json_lines_iter(
|
|
283
|
+
it: Iterable[Any],
|
|
284
|
+
*,
|
|
285
|
+
deterministic_sets: bool = False,
|
|
286
|
+
decimal_as_float: bool = False,
|
|
287
|
+
enum_as_name: bool = False,
|
|
288
|
+
passthrough_datetime: bool = False,
|
|
289
|
+
safe_fallback: bool = False,
|
|
290
|
+
fallback_clip: int = 2048,
|
|
291
|
+
naive_utc: bool = False,
|
|
292
|
+
utc_z: bool = False,
|
|
293
|
+
allow_non_str_keys: bool = False,
|
|
294
|
+
default: Callable[[Any], Any] | None = None,
|
|
295
|
+
options: int | None = None,
|
|
296
|
+
) -> Iterable[bytes]:
|
|
297
|
+
"""Stream iterable as NDJSON (newline-delimited JSON bytes).
|
|
298
|
+
|
|
299
|
+
Each item serialized to one line with trailing newline. Suitable for
|
|
300
|
+
streaming large datasets or log output.
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
it: Iterable of objects to serialize.
|
|
304
|
+
deterministic_sets: Sort sets.
|
|
305
|
+
decimal_as_float: Decimal as float.
|
|
306
|
+
enum_as_name: Enum as name.
|
|
307
|
+
passthrough_datetime: Custom datetime handling.
|
|
308
|
+
safe_fallback: Never raise (for logging).
|
|
309
|
+
fallback_clip: Max repr chars in safe mode.
|
|
310
|
+
naive_utc: Naive datetime as UTC.
|
|
311
|
+
utc_z: Use 'Z' for UTC.
|
|
312
|
+
allow_non_str_keys: Allow non-string dict keys.
|
|
313
|
+
default: Custom default callable.
|
|
314
|
+
options: Pre-composed option flags (newline always added).
|
|
315
|
+
|
|
316
|
+
Yields:
|
|
317
|
+
JSON bytes for each item with trailing newline.
|
|
318
|
+
"""
|
|
319
|
+
if default is None:
|
|
320
|
+
default = _cached_default(
|
|
321
|
+
deterministic_sets=deterministic_sets,
|
|
322
|
+
decimal_as_float=decimal_as_float,
|
|
323
|
+
enum_as_name=enum_as_name,
|
|
324
|
+
passthrough_datetime=passthrough_datetime,
|
|
325
|
+
safe_fallback=safe_fallback,
|
|
326
|
+
fallback_clip=fallback_clip,
|
|
327
|
+
)
|
|
328
|
+
if options is None:
|
|
329
|
+
opt = make_options(
|
|
330
|
+
pretty=False,
|
|
331
|
+
sort_keys=False,
|
|
332
|
+
naive_utc=naive_utc,
|
|
333
|
+
utc_z=utc_z,
|
|
334
|
+
append_newline=True,
|
|
335
|
+
passthrough_datetime=passthrough_datetime,
|
|
336
|
+
allow_non_str_keys=allow_non_str_keys,
|
|
337
|
+
)
|
|
338
|
+
else:
|
|
339
|
+
opt = options | orjson.OPT_APPEND_NEWLINE # Always enforce newline
|
|
340
|
+
|
|
341
|
+
for item in it:
|
|
342
|
+
yield orjson.dumps(item, default=default, option=opt)
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
@lru_cache(maxsize=128)
|
|
346
|
+
def _cached_default(
|
|
347
|
+
deterministic_sets: bool,
|
|
348
|
+
decimal_as_float: bool,
|
|
349
|
+
enum_as_name: bool,
|
|
350
|
+
passthrough_datetime: bool,
|
|
351
|
+
safe_fallback: bool,
|
|
352
|
+
fallback_clip: int,
|
|
353
|
+
) -> Callable[[Any], Any]:
|
|
354
|
+
"""Cache default handlers to avoid repeated construction."""
|
|
355
|
+
return get_orjson_default(
|
|
356
|
+
deterministic_sets=deterministic_sets,
|
|
357
|
+
decimal_as_float=decimal_as_float,
|
|
358
|
+
enum_as_name=enum_as_name,
|
|
359
|
+
passthrough_datetime=passthrough_datetime,
|
|
360
|
+
safe_fallback=safe_fallback,
|
|
361
|
+
fallback_clip=fallback_clip,
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def _default_serializers(
|
|
366
|
+
deterministic_sets: bool,
|
|
367
|
+
decimal_as_float: bool,
|
|
368
|
+
enum_as_name: bool,
|
|
369
|
+
passthrough_datetime: bool,
|
|
370
|
+
) -> dict[type, Callable[[Any], Any]]:
|
|
371
|
+
"""Build type->serializer mapping based on configuration."""
|
|
372
|
+
|
|
373
|
+
def normalize_for_sorting(x: Any) -> str:
|
|
374
|
+
"""Strip memory addresses from repr for stable sorting."""
|
|
375
|
+
return _ADDR_PAT.sub(" at 0x?", str(x))
|
|
376
|
+
|
|
377
|
+
def stable_sorted_iterable(o: Iterable[Any]) -> list[Any]:
|
|
378
|
+
"""Sort mixed-type iterables by (class_name, normalized_repr)."""
|
|
379
|
+
return sorted(o, key=lambda x: (x.__class__.__name__, normalize_for_sorting(x)))
|
|
380
|
+
|
|
381
|
+
ser: dict[type, Callable[[Any], Any]] = {
|
|
382
|
+
Path: str,
|
|
383
|
+
decimal.Decimal: (float if decimal_as_float else str),
|
|
384
|
+
set: (stable_sorted_iterable if deterministic_sets else list),
|
|
385
|
+
frozenset: (stable_sorted_iterable if deterministic_sets else list),
|
|
386
|
+
}
|
|
387
|
+
if enum_as_name:
|
|
388
|
+
ser[Enum] = lambda e: e.name
|
|
389
|
+
|
|
390
|
+
if passthrough_datetime:
|
|
391
|
+
ser[dt.datetime] = lambda o: o.isoformat()
|
|
392
|
+
return ser
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# Copyright (c) 2025 - 2026, HaiyangLi <quantocean.li at gmail dot com>
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
"""Thread-safe lazy initialization utility."""
|
|
5
|
+
|
|
6
|
+
import threading
|
|
7
|
+
from collections.abc import Callable
|
|
8
|
+
|
|
9
|
+
__all__ = ("LazyInit",)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class LazyInit:
|
|
13
|
+
"""Thread-safe lazy initialization helper using double-checked locking.
|
|
14
|
+
|
|
15
|
+
Defers expensive imports/setup until first use. Guarantees init_func
|
|
16
|
+
runs exactly once even under concurrent access.
|
|
17
|
+
|
|
18
|
+
Example:
|
|
19
|
+
_lazy = LazyInit()
|
|
20
|
+
_MODEL_LIKE = None
|
|
21
|
+
|
|
22
|
+
def _do_init():
|
|
23
|
+
global _MODEL_LIKE
|
|
24
|
+
from pydantic import BaseModel
|
|
25
|
+
_MODEL_LIKE = (BaseModel,)
|
|
26
|
+
|
|
27
|
+
def my_function(x):
|
|
28
|
+
_lazy.ensure(_do_init)
|
|
29
|
+
# _MODEL_LIKE is now initialized
|
|
30
|
+
|
|
31
|
+
Attributes:
|
|
32
|
+
initialized: True after ensure() has completed successfully.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
__slots__ = ("_initialized", "_lock")
|
|
36
|
+
|
|
37
|
+
def __init__(self) -> None:
|
|
38
|
+
"""Create uninitialized LazyInit instance."""
|
|
39
|
+
self._initialized = False
|
|
40
|
+
self._lock = threading.RLock()
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def initialized(self) -> bool:
|
|
44
|
+
"""Check if initialization has completed."""
|
|
45
|
+
return self._initialized
|
|
46
|
+
|
|
47
|
+
def ensure(self, init_func: Callable[[], None]) -> None:
|
|
48
|
+
"""Execute init_func exactly once, thread-safely.
|
|
49
|
+
|
|
50
|
+
Uses double-checked locking: fast path (no lock) when already
|
|
51
|
+
initialized, lock acquisition only on first call.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
init_func: Initialization function. Should be idempotent
|
|
55
|
+
as a safety measure (though only called once).
|
|
56
|
+
"""
|
|
57
|
+
if self._initialized:
|
|
58
|
+
return
|
|
59
|
+
with self._lock:
|
|
60
|
+
if self._initialized:
|
|
61
|
+
return
|
|
62
|
+
init_func()
|
|
63
|
+
self._initialized = True
|
kronos/utils/_to_list.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# Copyright (c) 2025 - 2026, HaiyangLi <quantocean.li at gmail dot com>
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
"""List conversion utilities with flattening, deduplication, and NA handling."""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from collections.abc import Iterable, Mapping
|
|
9
|
+
from enum import Enum as _Enum
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from ._hash import compute_hash
|
|
13
|
+
from ._lazy_init import LazyInit
|
|
14
|
+
|
|
15
|
+
__all__ = ("to_list",)
|
|
16
|
+
|
|
17
|
+
_lazy = LazyInit()
|
|
18
|
+
_MODEL_LIKE = None
|
|
19
|
+
_MAP_LIKE = None
|
|
20
|
+
_SINGLETONE_TYPES = None
|
|
21
|
+
_SKIP_TYPE = None
|
|
22
|
+
_SKIP_TUPLE_SET = None
|
|
23
|
+
_BYTE_LIKE = (str, bytes, bytearray)
|
|
24
|
+
_TUPLE_SET = (tuple, set, frozenset)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _do_init() -> None:
|
|
28
|
+
"""Initialize lazy type constants (Pydantic, kron sentinels)."""
|
|
29
|
+
from pydantic import BaseModel
|
|
30
|
+
from pydantic_core import PydanticUndefinedType
|
|
31
|
+
|
|
32
|
+
from kronos.types import UndefinedType, UnsetType
|
|
33
|
+
|
|
34
|
+
global _MODEL_LIKE, _MAP_LIKE, _SINGLETONE_TYPES, _SKIP_TYPE, _SKIP_TUPLE_SET
|
|
35
|
+
_MODEL_LIKE = (BaseModel,)
|
|
36
|
+
_MAP_LIKE = (Mapping, *_MODEL_LIKE)
|
|
37
|
+
_SINGLETONE_TYPES = (UndefinedType, UnsetType, PydanticUndefinedType)
|
|
38
|
+
_SKIP_TYPE = (*_BYTE_LIKE, *_MAP_LIKE, _Enum)
|
|
39
|
+
_SKIP_TUPLE_SET = (*_SKIP_TYPE, *_TUPLE_SET)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def to_list(
|
|
43
|
+
input_: Any,
|
|
44
|
+
/,
|
|
45
|
+
*,
|
|
46
|
+
flatten: bool = False,
|
|
47
|
+
dropna: bool = False,
|
|
48
|
+
unique: bool = False,
|
|
49
|
+
use_values: bool = False,
|
|
50
|
+
flatten_tuple_set: bool = False,
|
|
51
|
+
) -> list:
|
|
52
|
+
"""Convert input to list with optional transformations.
|
|
53
|
+
|
|
54
|
+
Type handling:
|
|
55
|
+
- None/Undefined/Unset: returns []
|
|
56
|
+
- list: returned as-is (not copied)
|
|
57
|
+
- Enum class: list of members (or values if use_values=True)
|
|
58
|
+
- str/bytes/bytearray: wrapped as [input_] unless use_values=True
|
|
59
|
+
- Mapping: wrapped as [input_] unless use_values=True (extracts values)
|
|
60
|
+
- BaseModel: wrapped as [input_]
|
|
61
|
+
- Other iterables: converted via list()
|
|
62
|
+
- Non-iterables: wrapped as [input_]
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
input_: Value to convert.
|
|
66
|
+
flatten: Recursively flatten nested iterables.
|
|
67
|
+
dropna: Remove None and sentinel values (Undefined, Unset).
|
|
68
|
+
unique: Remove duplicates. Requires flatten=True.
|
|
69
|
+
use_values: Extract values from Enum classes and Mappings.
|
|
70
|
+
flatten_tuple_set: When flatten=True, also flatten tuples/sets/frozensets.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Processed list.
|
|
74
|
+
|
|
75
|
+
Raises:
|
|
76
|
+
ValueError: unique=True without flatten=True, or unhashable non-mapping item.
|
|
77
|
+
|
|
78
|
+
Edge Cases:
|
|
79
|
+
- Nested lists: preserved unless flatten=True
|
|
80
|
+
- Unhashable items with unique=True: falls back to compute_hash for mappings
|
|
81
|
+
- Empty input: returns []
|
|
82
|
+
"""
|
|
83
|
+
_lazy.ensure(_do_init)
|
|
84
|
+
|
|
85
|
+
def _process_list(
|
|
86
|
+
lst: list[Any],
|
|
87
|
+
flatten: bool,
|
|
88
|
+
dropna: bool,
|
|
89
|
+
skip_types: tuple[type, ...],
|
|
90
|
+
) -> list[Any]:
|
|
91
|
+
"""Recursively process list with flatten/dropna logic."""
|
|
92
|
+
result: list[Any] = []
|
|
93
|
+
for item in lst:
|
|
94
|
+
if dropna and (item is None or isinstance(item, _SINGLETONE_TYPES)):
|
|
95
|
+
continue
|
|
96
|
+
is_iterable = isinstance(item, Iterable)
|
|
97
|
+
should_skip = isinstance(item, skip_types)
|
|
98
|
+
if is_iterable and not should_skip:
|
|
99
|
+
item_list = list(item)
|
|
100
|
+
if flatten:
|
|
101
|
+
result.extend(_process_list(item_list, flatten, dropna, skip_types))
|
|
102
|
+
else:
|
|
103
|
+
result.append(_process_list(item_list, flatten, dropna, skip_types))
|
|
104
|
+
else:
|
|
105
|
+
result.append(item)
|
|
106
|
+
return result
|
|
107
|
+
|
|
108
|
+
def _to_list_type(input_: Any, use_values: bool) -> list[Any]:
|
|
109
|
+
"""Convert input to initial list based on type."""
|
|
110
|
+
if input_ is None or isinstance(input_, _SINGLETONE_TYPES):
|
|
111
|
+
return []
|
|
112
|
+
if isinstance(input_, list):
|
|
113
|
+
return input_
|
|
114
|
+
if isinstance(input_, type) and issubclass(input_, _Enum):
|
|
115
|
+
members = input_.__members__.values()
|
|
116
|
+
return [member.value for member in members] if use_values else list(members)
|
|
117
|
+
if isinstance(input_, _BYTE_LIKE):
|
|
118
|
+
return list(input_) if use_values else [input_]
|
|
119
|
+
if isinstance(input_, Mapping):
|
|
120
|
+
return list(input_.values()) if use_values and hasattr(input_, "values") else [input_]
|
|
121
|
+
if isinstance(input_, _MODEL_LIKE):
|
|
122
|
+
return [input_]
|
|
123
|
+
if isinstance(input_, Iterable) and not isinstance(input_, _BYTE_LIKE):
|
|
124
|
+
return list(input_)
|
|
125
|
+
return [input_]
|
|
126
|
+
|
|
127
|
+
if unique and not flatten:
|
|
128
|
+
raise ValueError("unique=True requires flatten=True")
|
|
129
|
+
|
|
130
|
+
initial_list = _to_list_type(input_, use_values=use_values)
|
|
131
|
+
skip_types: tuple[type, ...] = _SKIP_TYPE if flatten_tuple_set else _SKIP_TUPLE_SET
|
|
132
|
+
processed = _process_list(initial_list, flatten=flatten, dropna=dropna, skip_types=skip_types)
|
|
133
|
+
|
|
134
|
+
if unique:
|
|
135
|
+
seen = set()
|
|
136
|
+
out = []
|
|
137
|
+
use_hash_fallback = False
|
|
138
|
+
for i in processed:
|
|
139
|
+
try:
|
|
140
|
+
if not use_hash_fallback and i not in seen:
|
|
141
|
+
seen.add(i)
|
|
142
|
+
out.append(i)
|
|
143
|
+
except TypeError:
|
|
144
|
+
if not use_hash_fallback:
|
|
145
|
+
# Restart with hash-based deduplication
|
|
146
|
+
use_hash_fallback = True
|
|
147
|
+
seen = set()
|
|
148
|
+
out = []
|
|
149
|
+
for j in processed:
|
|
150
|
+
try:
|
|
151
|
+
hash_value = hash(j)
|
|
152
|
+
except TypeError:
|
|
153
|
+
if _MAP_LIKE is not None and isinstance(j, _MAP_LIKE):
|
|
154
|
+
hash_value = compute_hash(j)
|
|
155
|
+
else:
|
|
156
|
+
raise ValueError(
|
|
157
|
+
"Unhashable type encountered in list unique value processing."
|
|
158
|
+
)
|
|
159
|
+
if hash_value not in seen:
|
|
160
|
+
seen.add(hash_value)
|
|
161
|
+
out.append(j)
|
|
162
|
+
break
|
|
163
|
+
return out
|
|
164
|
+
|
|
165
|
+
return processed
|
kronos/utils/_to_num.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# Copyright (c) 2025 - 2026, HaiyangLi <quantocean.li at gmail dot com>
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
"""Numeric conversion utilities with bounds checking and security limits."""
|
|
5
|
+
|
|
6
|
+
import math
|
|
7
|
+
from decimal import Decimal
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
__all__ = ("to_num", "MAX_NUMBER_STRING_LENGTH")
|
|
11
|
+
|
|
12
|
+
MAX_NUMBER_STRING_LENGTH = 1000
|
|
13
|
+
"""Max string length for numeric conversion (DoS protection)."""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def to_num(
|
|
17
|
+
input_: Any,
|
|
18
|
+
/,
|
|
19
|
+
*,
|
|
20
|
+
upper_bound: int | float | None = None,
|
|
21
|
+
lower_bound: int | float | None = None,
|
|
22
|
+
num_type: type[int] | type[float] = float,
|
|
23
|
+
precision: int | None = None,
|
|
24
|
+
allow_inf: bool = False,
|
|
25
|
+
) -> int | float:
|
|
26
|
+
"""Convert input to numeric type with bounds checking and validation.
|
|
27
|
+
|
|
28
|
+
Handles: bool, int, float, Decimal, str. Strings are stripped before parsing.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
input_: Value to convert. Bools treated as int (True=1, False=0).
|
|
32
|
+
upper_bound: Maximum allowed value (inclusive).
|
|
33
|
+
lower_bound: Minimum allowed value (inclusive).
|
|
34
|
+
num_type: Target type, must be `int` or `float`.
|
|
35
|
+
precision: Decimal places for float rounding (ignored for int).
|
|
36
|
+
allow_inf: Permit infinity values. Default False.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Converted numeric value of type `num_type`.
|
|
40
|
+
|
|
41
|
+
Raises:
|
|
42
|
+
ValueError: Empty/too-long string, out of bounds, NaN, inf (when disallowed).
|
|
43
|
+
TypeError: Unsupported input type.
|
|
44
|
+
|
|
45
|
+
Edge Cases:
|
|
46
|
+
- Empty string: raises ValueError
|
|
47
|
+
- Whitespace-only string: raises ValueError (stripped to empty)
|
|
48
|
+
- "inf"/"-inf": raises unless allow_inf=True
|
|
49
|
+
- "nan": always raises
|
|
50
|
+
- Decimal: converted via float (may lose precision)
|
|
51
|
+
"""
|
|
52
|
+
if num_type not in (int, float):
|
|
53
|
+
raise ValueError(f"Invalid number type: {num_type}")
|
|
54
|
+
|
|
55
|
+
if isinstance(input_, (bool, int, float, Decimal)):
|
|
56
|
+
value = float(input_)
|
|
57
|
+
elif isinstance(input_, str):
|
|
58
|
+
input_ = input_.strip()
|
|
59
|
+
if not input_:
|
|
60
|
+
raise ValueError("Empty string cannot be converted to number")
|
|
61
|
+
if len(input_) > MAX_NUMBER_STRING_LENGTH:
|
|
62
|
+
msg = f"String length ({len(input_)}) exceeds maximum ({MAX_NUMBER_STRING_LENGTH})"
|
|
63
|
+
raise ValueError(msg)
|
|
64
|
+
try:
|
|
65
|
+
value = float(input_)
|
|
66
|
+
except ValueError as e:
|
|
67
|
+
raise ValueError(f"Cannot convert '{input_}' to number") from e
|
|
68
|
+
else:
|
|
69
|
+
raise TypeError(f"Cannot convert {type(input_).__name__} to number")
|
|
70
|
+
|
|
71
|
+
# NaN bypasses all comparisons; always reject
|
|
72
|
+
if math.isnan(value):
|
|
73
|
+
raise ValueError("NaN is not allowed")
|
|
74
|
+
if math.isinf(value) and not allow_inf:
|
|
75
|
+
raise ValueError("Infinity is not allowed (use allow_inf=True to permit)")
|
|
76
|
+
|
|
77
|
+
if upper_bound is not None and value > upper_bound:
|
|
78
|
+
raise ValueError(f"Value {value} exceeds upper bound {upper_bound}")
|
|
79
|
+
if lower_bound is not None and value < lower_bound:
|
|
80
|
+
raise ValueError(f"Value {value} below lower bound {lower_bound}")
|
|
81
|
+
|
|
82
|
+
if precision is not None and num_type is float:
|
|
83
|
+
value = round(value, precision)
|
|
84
|
+
|
|
85
|
+
return num_type(value)
|