lionherd-core 1.0.0a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lionherd_core/__init__.py +84 -0
- lionherd_core/base/__init__.py +30 -0
- lionherd_core/base/_utils.py +295 -0
- lionherd_core/base/broadcaster.py +128 -0
- lionherd_core/base/element.py +300 -0
- lionherd_core/base/event.py +322 -0
- lionherd_core/base/eventbus.py +112 -0
- lionherd_core/base/flow.py +236 -0
- lionherd_core/base/graph.py +616 -0
- lionherd_core/base/node.py +212 -0
- lionherd_core/base/pile.py +811 -0
- lionherd_core/base/progression.py +261 -0
- lionherd_core/errors.py +104 -0
- lionherd_core/libs/__init__.py +2 -0
- lionherd_core/libs/concurrency/__init__.py +60 -0
- lionherd_core/libs/concurrency/_cancel.py +85 -0
- lionherd_core/libs/concurrency/_errors.py +80 -0
- lionherd_core/libs/concurrency/_patterns.py +238 -0
- lionherd_core/libs/concurrency/_primitives.py +253 -0
- lionherd_core/libs/concurrency/_priority_queue.py +135 -0
- lionherd_core/libs/concurrency/_resource_tracker.py +66 -0
- lionherd_core/libs/concurrency/_task.py +58 -0
- lionherd_core/libs/concurrency/_utils.py +61 -0
- lionherd_core/libs/schema_handlers/__init__.py +35 -0
- lionherd_core/libs/schema_handlers/_function_call_parser.py +122 -0
- lionherd_core/libs/schema_handlers/_minimal_yaml.py +88 -0
- lionherd_core/libs/schema_handlers/_schema_to_model.py +251 -0
- lionherd_core/libs/schema_handlers/_typescript.py +153 -0
- lionherd_core/libs/string_handlers/__init__.py +15 -0
- lionherd_core/libs/string_handlers/_extract_json.py +65 -0
- lionherd_core/libs/string_handlers/_fuzzy_json.py +103 -0
- lionherd_core/libs/string_handlers/_string_similarity.py +347 -0
- lionherd_core/libs/string_handlers/_to_num.py +63 -0
- lionherd_core/ln/__init__.py +45 -0
- lionherd_core/ln/_async_call.py +314 -0
- lionherd_core/ln/_fuzzy_match.py +166 -0
- lionherd_core/ln/_fuzzy_validate.py +151 -0
- lionherd_core/ln/_hash.py +141 -0
- lionherd_core/ln/_json_dump.py +347 -0
- lionherd_core/ln/_list_call.py +110 -0
- lionherd_core/ln/_to_dict.py +373 -0
- lionherd_core/ln/_to_list.py +190 -0
- lionherd_core/ln/_utils.py +156 -0
- lionherd_core/lndl/__init__.py +62 -0
- lionherd_core/lndl/errors.py +30 -0
- lionherd_core/lndl/fuzzy.py +321 -0
- lionherd_core/lndl/parser.py +427 -0
- lionherd_core/lndl/prompt.py +137 -0
- lionherd_core/lndl/resolver.py +323 -0
- lionherd_core/lndl/types.py +287 -0
- lionherd_core/protocols.py +181 -0
- lionherd_core/py.typed +0 -0
- lionherd_core/types/__init__.py +46 -0
- lionherd_core/types/_sentinel.py +131 -0
- lionherd_core/types/base.py +341 -0
- lionherd_core/types/operable.py +133 -0
- lionherd_core/types/spec.py +313 -0
- lionherd_core/types/spec_adapters/__init__.py +10 -0
- lionherd_core/types/spec_adapters/_protocol.py +125 -0
- lionherd_core/types/spec_adapters/pydantic_field.py +177 -0
- lionherd_core-1.0.0a3.dist-info/METADATA +502 -0
- lionherd_core-1.0.0a3.dist-info/RECORD +64 -0
- lionherd_core-1.0.0a3.dist-info/WHEEL +4 -0
- lionherd_core-1.0.0a3.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# Copyright (c) 2025, HaiyangLi <quantocean.li at gmail dot com>
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Literal
|
|
6
|
+
|
|
7
|
+
from lionherd_core.errors import ValidationError
|
|
8
|
+
|
|
9
|
+
from ..libs.string_handlers._extract_json import extract_json
|
|
10
|
+
from ..libs.string_handlers._string_similarity import SIMILARITY_TYPE
|
|
11
|
+
from ..types import KeysLike
|
|
12
|
+
from ._fuzzy_match import FuzzyMatchKeysParams, fuzzy_match_keys
|
|
13
|
+
from ._to_dict import to_dict
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from pydantic import BaseModel
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
__all__ = ("fuzzy_validate_pydantic",)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def fuzzy_validate_pydantic(
|
|
23
|
+
text,
|
|
24
|
+
/,
|
|
25
|
+
model_type: "type[BaseModel]",
|
|
26
|
+
fuzzy_parse: bool = True,
|
|
27
|
+
fuzzy_match: bool = False,
|
|
28
|
+
fuzzy_match_params: FuzzyMatchKeysParams | dict = None,
|
|
29
|
+
):
|
|
30
|
+
"""Validate and parse text/dict into Pydantic model with fuzzy parsing.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
text: Input data (BaseModel instance, dict, or JSON string)
|
|
34
|
+
model_type: Target Pydantic model class
|
|
35
|
+
fuzzy_parse: Enable fuzzy JSON extraction from text
|
|
36
|
+
fuzzy_match: Enable fuzzy key matching for field names
|
|
37
|
+
fuzzy_match_params: Parameters for fuzzy matching (dict or FuzzyMatchKeysParams)
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
Validated Pydantic model instance
|
|
41
|
+
|
|
42
|
+
Raises:
|
|
43
|
+
ValidationError: If JSON extraction or model validation fails
|
|
44
|
+
TypeError: If fuzzy_match_params is invalid type
|
|
45
|
+
"""
|
|
46
|
+
# Handle already-valid model instances
|
|
47
|
+
if isinstance(text, model_type):
|
|
48
|
+
return text
|
|
49
|
+
|
|
50
|
+
# Handle dict inputs directly (skip JSON extraction)
|
|
51
|
+
if isinstance(text, dict):
|
|
52
|
+
model_data = text
|
|
53
|
+
else:
|
|
54
|
+
# Handle string inputs (JSON strings, markdown, etc.)
|
|
55
|
+
try:
|
|
56
|
+
model_data = extract_json(text, fuzzy_parse=fuzzy_parse)
|
|
57
|
+
except Exception as e:
|
|
58
|
+
raise ValidationError(f"Failed to extract valid JSON from model response: {e}") from e
|
|
59
|
+
|
|
60
|
+
d = model_data
|
|
61
|
+
if fuzzy_match:
|
|
62
|
+
if fuzzy_match_params is None:
|
|
63
|
+
model_data = fuzzy_match_keys(d, model_type.model_fields, handle_unmatched="remove")
|
|
64
|
+
elif isinstance(fuzzy_match_params, dict):
|
|
65
|
+
model_data = fuzzy_match_keys(d, model_type.model_fields, **fuzzy_match_params)
|
|
66
|
+
elif isinstance(fuzzy_match_params, FuzzyMatchKeysParams):
|
|
67
|
+
model_data = fuzzy_match_params(d, model_type.model_fields)
|
|
68
|
+
else:
|
|
69
|
+
raise TypeError("fuzzy_keys_params must be a dict or FuzzyMatchKeysParams instance")
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
return model_type.model_validate(model_data)
|
|
73
|
+
except Exception as e:
|
|
74
|
+
raise ValidationError(f"Validation failed: {e}") from e
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def fuzzy_validate_mapping(
|
|
78
|
+
d: Any,
|
|
79
|
+
keys: KeysLike,
|
|
80
|
+
/,
|
|
81
|
+
*,
|
|
82
|
+
similarity_algo: SIMILARITY_TYPE | Callable[[str, str], float] = "jaro_winkler",
|
|
83
|
+
similarity_threshold: float = 0.85,
|
|
84
|
+
fuzzy_match: bool = True,
|
|
85
|
+
handle_unmatched: Literal["ignore", "raise", "remove", "fill", "force"] = "ignore",
|
|
86
|
+
fill_value: Any = None,
|
|
87
|
+
fill_mapping: dict[str, Any] | None = None,
|
|
88
|
+
strict: bool = False,
|
|
89
|
+
suppress_conversion_errors: bool = False,
|
|
90
|
+
) -> dict[str, Any]:
|
|
91
|
+
"""Validate any input into dict with expected keys and fuzzy matching.
|
|
92
|
+
|
|
93
|
+
Converts input (dict, JSON string, XML, object) to dict and validates keys.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
d: Input to convert and validate
|
|
97
|
+
keys: Expected keys (list or dict-like)
|
|
98
|
+
similarity_algo: String similarity algorithm
|
|
99
|
+
similarity_threshold: Minimum similarity score (0.0-1.0)
|
|
100
|
+
fuzzy_match: Enable fuzzy key matching
|
|
101
|
+
handle_unmatched: How to handle unmatched keys
|
|
102
|
+
fill_value: Default value for missing keys
|
|
103
|
+
fill_mapping: Custom values for specific keys
|
|
104
|
+
strict: Raise if expected keys are missing
|
|
105
|
+
suppress_conversion_errors: Return empty dict on conversion failure
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
Validated dictionary with corrected keys
|
|
109
|
+
|
|
110
|
+
Raises:
|
|
111
|
+
TypeError: If d is None
|
|
112
|
+
ValueError: If conversion fails and suppress_conversion_errors is False
|
|
113
|
+
"""
|
|
114
|
+
if d is None:
|
|
115
|
+
raise TypeError("Input cannot be None")
|
|
116
|
+
|
|
117
|
+
# Try converting to dictionary
|
|
118
|
+
try:
|
|
119
|
+
if isinstance(d, str):
|
|
120
|
+
try:
|
|
121
|
+
json_result = extract_json(d, fuzzy_parse=True, return_one_if_single=True)
|
|
122
|
+
dict_input = json_result[0] if isinstance(json_result, list) else json_result
|
|
123
|
+
except Exception:
|
|
124
|
+
dict_input = to_dict(d, fuzzy_parse=True, suppress=True)
|
|
125
|
+
else:
|
|
126
|
+
dict_input = to_dict(d, prioritize_model_dump=True, fuzzy_parse=True, suppress=True)
|
|
127
|
+
|
|
128
|
+
if not isinstance(dict_input, dict):
|
|
129
|
+
if suppress_conversion_errors:
|
|
130
|
+
dict_input = {}
|
|
131
|
+
else:
|
|
132
|
+
raise ValueError(f"Failed to convert input to dictionary: {type(dict_input)}")
|
|
133
|
+
|
|
134
|
+
except Exception as e:
|
|
135
|
+
if suppress_conversion_errors:
|
|
136
|
+
dict_input = {}
|
|
137
|
+
else:
|
|
138
|
+
raise ValueError(f"Failed to convert input to dictionary: {e}")
|
|
139
|
+
|
|
140
|
+
# Validate the dictionary
|
|
141
|
+
return fuzzy_match_keys(
|
|
142
|
+
dict_input,
|
|
143
|
+
keys,
|
|
144
|
+
similarity_algo=similarity_algo,
|
|
145
|
+
similarity_threshold=similarity_threshold,
|
|
146
|
+
fuzzy_match=fuzzy_match,
|
|
147
|
+
handle_unmatched=handle_unmatched,
|
|
148
|
+
fill_value=fill_value,
|
|
149
|
+
fill_mapping=fill_mapping,
|
|
150
|
+
strict=strict,
|
|
151
|
+
)
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# Copyright (c) 2025, HaiyangLi <quantocean.li at gmail dot com>
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import contextlib
|
|
7
|
+
import copy
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
__all__ = ("hash_dict",)
|
|
11
|
+
|
|
12
|
+
# Global initialization state
|
|
13
|
+
_INITIALIZED = False
|
|
14
|
+
PydanticBaseModel = None
|
|
15
|
+
|
|
16
|
+
# --- Canonical Representation Generator ---
|
|
17
|
+
_PRIMITIVE_TYPES = (str, int, float, bool, type(None))
|
|
18
|
+
_TYPE_MARKER_DICT = 0
|
|
19
|
+
_TYPE_MARKER_LIST = 1
|
|
20
|
+
_TYPE_MARKER_TUPLE = 2
|
|
21
|
+
_TYPE_MARKER_SET = 3
|
|
22
|
+
_TYPE_MARKER_FROZENSET = 4
|
|
23
|
+
_TYPE_MARKER_PYDANTIC = 5 # Distinguishes dumped Pydantic models
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _generate_hashable_representation(item: Any) -> Any:
|
|
27
|
+
"""Convert object to stable hashable representation recursively.
|
|
28
|
+
|
|
29
|
+
Ensures order-independent hashing for dicts/sets and consistent handling of collections.
|
|
30
|
+
"""
|
|
31
|
+
if isinstance(item, _PRIMITIVE_TYPES):
|
|
32
|
+
return item
|
|
33
|
+
|
|
34
|
+
if PydanticBaseModel and isinstance(item, PydanticBaseModel):
|
|
35
|
+
# Process the Pydantic model by first dumping it to a dict, then processing that dict.
|
|
36
|
+
# The type marker distinguishes this from a regular dictionary.
|
|
37
|
+
return (
|
|
38
|
+
_TYPE_MARKER_PYDANTIC,
|
|
39
|
+
_generate_hashable_representation(item.model_dump()),
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
if isinstance(item, dict):
|
|
43
|
+
# Sort dictionary items by key (stringified) for order-insensitivity.
|
|
44
|
+
return (
|
|
45
|
+
_TYPE_MARKER_DICT,
|
|
46
|
+
tuple(
|
|
47
|
+
(str(k), _generate_hashable_representation(v))
|
|
48
|
+
for k, v in sorted(item.items(), key=lambda x: str(x[0]))
|
|
49
|
+
),
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
if isinstance(item, list):
|
|
53
|
+
return (
|
|
54
|
+
_TYPE_MARKER_LIST,
|
|
55
|
+
tuple(_generate_hashable_representation(elem) for elem in item),
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
if isinstance(item, tuple):
|
|
59
|
+
return (
|
|
60
|
+
_TYPE_MARKER_TUPLE,
|
|
61
|
+
tuple(_generate_hashable_representation(elem) for elem in item),
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# frozenset must be checked before set
|
|
65
|
+
if isinstance(item, frozenset):
|
|
66
|
+
try: # Attempt direct sort for comparable elements
|
|
67
|
+
sorted_elements = sorted(list(item))
|
|
68
|
+
except TypeError: # Fallback for unorderable mixed types
|
|
69
|
+
|
|
70
|
+
def sort_key(x):
|
|
71
|
+
# Deterministic ordering across mixed, unorderable types
|
|
72
|
+
# Sort strictly by textual type then textual value.
|
|
73
|
+
# This also naturally places bool before int because
|
|
74
|
+
# "<class 'bool'>" < "<class 'int'>" lexicographically.
|
|
75
|
+
return (str(type(x)), str(x))
|
|
76
|
+
|
|
77
|
+
sorted_elements = sorted(list(item), key=sort_key)
|
|
78
|
+
return (
|
|
79
|
+
_TYPE_MARKER_FROZENSET,
|
|
80
|
+
tuple(_generate_hashable_representation(elem) for elem in sorted_elements),
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
if isinstance(item, set):
|
|
84
|
+
try:
|
|
85
|
+
sorted_elements = sorted(list(item))
|
|
86
|
+
except TypeError:
|
|
87
|
+
# For mixed types, use a deterministic, portable sort key
|
|
88
|
+
def sort_key(x):
|
|
89
|
+
# Sort by textual type then textual value for stability.
|
|
90
|
+
return (str(type(x)), str(x))
|
|
91
|
+
|
|
92
|
+
sorted_elements = sorted(list(item), key=sort_key)
|
|
93
|
+
return (
|
|
94
|
+
_TYPE_MARKER_SET,
|
|
95
|
+
tuple(_generate_hashable_representation(elem) for elem in sorted_elements),
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Fallback for other types (e.g., custom objects not derived from the above)
|
|
99
|
+
with contextlib.suppress(Exception):
|
|
100
|
+
return str(item)
|
|
101
|
+
with contextlib.suppress(Exception):
|
|
102
|
+
return repr(item)
|
|
103
|
+
|
|
104
|
+
# If both str() and repr() fail, return a stable fallback based on type and id
|
|
105
|
+
return f"<unhashable:{type(item).__name__}:{id(item)}>"
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def hash_dict(data: Any, strict: bool = False) -> int:
|
|
109
|
+
"""Generate stable hash for any data structure including dicts, lists, and Pydantic models.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
data: Data to hash (dict, list, BaseModel, or any object)
|
|
113
|
+
strict: If True, deepcopy data before hashing to prevent mutation side effects
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
Integer hash value (stable across equivalent structures)
|
|
117
|
+
|
|
118
|
+
Raises:
|
|
119
|
+
TypeError: If generated representation is not hashable
|
|
120
|
+
"""
|
|
121
|
+
global _INITIALIZED, PydanticBaseModel
|
|
122
|
+
if _INITIALIZED is False:
|
|
123
|
+
from pydantic import BaseModel
|
|
124
|
+
|
|
125
|
+
PydanticBaseModel = BaseModel
|
|
126
|
+
_INITIALIZED = True
|
|
127
|
+
|
|
128
|
+
data_to_process = data
|
|
129
|
+
if strict:
|
|
130
|
+
data_to_process = copy.deepcopy(data)
|
|
131
|
+
|
|
132
|
+
hashable_repr = _generate_hashable_representation(data_to_process)
|
|
133
|
+
|
|
134
|
+
try:
|
|
135
|
+
return hash(hashable_repr)
|
|
136
|
+
except TypeError as e:
|
|
137
|
+
raise TypeError(
|
|
138
|
+
f"The generated representation for the input data was not hashable. "
|
|
139
|
+
f"Input type: {type(data).__name__}, Representation type: {type(hashable_repr).__name__}. "
|
|
140
|
+
f"Original error: {e}"
|
|
141
|
+
)
|
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
# Copyright (c) 2025, HaiyangLi <quantocean.li at gmail dot com>
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import contextlib
|
|
7
|
+
import datetime as dt
|
|
8
|
+
import decimal
|
|
9
|
+
import re
|
|
10
|
+
from collections.abc import Callable, Iterable, Mapping
|
|
11
|
+
from enum import Enum
|
|
12
|
+
from functools import lru_cache
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from textwrap import shorten
|
|
15
|
+
from typing import Any
|
|
16
|
+
from uuid import UUID
|
|
17
|
+
|
|
18
|
+
import orjson
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"get_orjson_default",
|
|
22
|
+
"json_dumpb",
|
|
23
|
+
"json_dumps",
|
|
24
|
+
"json_lines_iter",
|
|
25
|
+
"make_options",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
# Types orjson already serializes natively at C/Rust speed.
|
|
29
|
+
# (We only route them through default() when passthrough is requested.)
|
|
30
|
+
_NATIVE = (dt.datetime, dt.date, dt.time, UUID)
|
|
31
|
+
|
|
32
|
+
# --------- helpers ------------------------------------------------------------
|
|
33
|
+
|
|
34
|
+
_ADDR_PAT = re.compile(r" at 0x[0-9A-Fa-f]+")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _clip(s: str, limit: int = 2048) -> str:
|
|
38
|
+
return shorten(s, width=limit, placeholder=f"...(+{len(s) - limit} chars)") # type: ignore[arg-type]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _normalize_for_sorting(x: Any) -> str:
|
|
42
|
+
"""Normalize repr/str to remove process-specific addresses."""
|
|
43
|
+
s = str(x)
|
|
44
|
+
return _ADDR_PAT.sub(" at 0x?", s)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _stable_sorted_iterable(o: Iterable[Any]) -> list[Any]:
|
|
48
|
+
"""
|
|
49
|
+
Deterministic ordering for sets (including mixed types).
|
|
50
|
+
Key: (class name, normalized str) avoids comparisons across unlike types
|
|
51
|
+
and removes memory address variance in default reprs.
|
|
52
|
+
"""
|
|
53
|
+
return sorted(o, key=lambda x: (x.__class__.__name__, _normalize_for_sorting(x)))
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _safe_exception_payload(ex: Exception) -> dict[str, str]:
|
|
57
|
+
return {"type": ex.__class__.__name__, "message": str(ex)}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _default_serializers(
|
|
61
|
+
deterministic_sets: bool,
|
|
62
|
+
decimal_as_float: bool,
|
|
63
|
+
enum_as_name: bool,
|
|
64
|
+
passthrough_datetime: bool,
|
|
65
|
+
) -> dict[type, Callable[[Any], Any]]:
|
|
66
|
+
ser: dict[type, Callable[[Any], Any]] = {
|
|
67
|
+
Path: str,
|
|
68
|
+
decimal.Decimal: (float if decimal_as_float else str),
|
|
69
|
+
set: (_stable_sorted_iterable if deterministic_sets else list),
|
|
70
|
+
frozenset: (_stable_sorted_iterable if deterministic_sets else list),
|
|
71
|
+
}
|
|
72
|
+
if enum_as_name:
|
|
73
|
+
ser[Enum] = lambda e: e.name
|
|
74
|
+
# Only needed if you also set OPT_PASSTHROUGH_DATETIME via options.
|
|
75
|
+
if passthrough_datetime:
|
|
76
|
+
ser[dt.datetime] = lambda o: o.isoformat()
|
|
77
|
+
return ser
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# --------- default() factory --------------------------------------------------
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def get_orjson_default(
|
|
84
|
+
*,
|
|
85
|
+
order: list[type] | None = None,
|
|
86
|
+
additional: Mapping[type, Callable[[Any], Any]] | None = None,
|
|
87
|
+
extend_default: bool = True,
|
|
88
|
+
deterministic_sets: bool = False,
|
|
89
|
+
decimal_as_float: bool = False,
|
|
90
|
+
enum_as_name: bool = False,
|
|
91
|
+
passthrough_datetime: bool = False,
|
|
92
|
+
safe_fallback: bool = False,
|
|
93
|
+
fallback_clip: int = 2048,
|
|
94
|
+
) -> Callable[[Any], Any]:
|
|
95
|
+
"""
|
|
96
|
+
Build a fast, extensible `default=` callable for orjson.dumps.
|
|
97
|
+
|
|
98
|
+
- deterministic_sets: sort set/frozenset deterministically (slower).
|
|
99
|
+
- decimal_as_float: serialize Decimal as float (faster/smaller; precision loss).
|
|
100
|
+
- enum_as_name: serialize Enum as .name (else orjson uses .value by default).
|
|
101
|
+
- passthrough_datetime: if True, also pass OPT_PASSTHROUGH_DATETIME in options.
|
|
102
|
+
- safe_fallback: if True, unknown objects never raise (for logs);
|
|
103
|
+
Exceptions become a tiny dict; all else becomes clipped repr(str).
|
|
104
|
+
|
|
105
|
+
'order' and 'additional' preserve your override semantics.
|
|
106
|
+
"""
|
|
107
|
+
ser = _default_serializers(
|
|
108
|
+
deterministic_sets=deterministic_sets,
|
|
109
|
+
decimal_as_float=decimal_as_float,
|
|
110
|
+
enum_as_name=enum_as_name,
|
|
111
|
+
passthrough_datetime=passthrough_datetime,
|
|
112
|
+
)
|
|
113
|
+
if additional:
|
|
114
|
+
ser.update(additional)
|
|
115
|
+
|
|
116
|
+
base_order: list[type] = [Path, decimal.Decimal, set, frozenset]
|
|
117
|
+
if enum_as_name:
|
|
118
|
+
base_order.insert(0, Enum)
|
|
119
|
+
if passthrough_datetime:
|
|
120
|
+
base_order.insert(0, dt.datetime)
|
|
121
|
+
|
|
122
|
+
if order:
|
|
123
|
+
order_ = (
|
|
124
|
+
(base_order + [t for t in order if t not in base_order])
|
|
125
|
+
if extend_default
|
|
126
|
+
else list(order)
|
|
127
|
+
)
|
|
128
|
+
else:
|
|
129
|
+
order_ = base_order.copy()
|
|
130
|
+
|
|
131
|
+
if not passthrough_datetime:
|
|
132
|
+
# Avoid checks for types already on the orjson native fast path.
|
|
133
|
+
order_ = [t for t in order_ if t not in _NATIVE]
|
|
134
|
+
|
|
135
|
+
order_tuple = tuple(order_)
|
|
136
|
+
cache: dict[type, Callable[[Any], Any]] = {}
|
|
137
|
+
|
|
138
|
+
def default(obj: Any) -> Any:
|
|
139
|
+
typ = obj.__class__
|
|
140
|
+
func = cache.get(typ)
|
|
141
|
+
if func is None:
|
|
142
|
+
for T in order_tuple:
|
|
143
|
+
if issubclass(typ, T):
|
|
144
|
+
f = ser.get(T)
|
|
145
|
+
if f:
|
|
146
|
+
cache[typ] = f
|
|
147
|
+
func = f
|
|
148
|
+
break
|
|
149
|
+
else:
|
|
150
|
+
# Duck-typed support for common data holders
|
|
151
|
+
md = getattr(obj, "model_dump", None)
|
|
152
|
+
if callable(md):
|
|
153
|
+
with contextlib.suppress(Exception):
|
|
154
|
+
return md()
|
|
155
|
+
|
|
156
|
+
dd = getattr(obj, "dict", None)
|
|
157
|
+
if callable(dd):
|
|
158
|
+
with contextlib.suppress(Exception):
|
|
159
|
+
return dd()
|
|
160
|
+
if safe_fallback:
|
|
161
|
+
if isinstance(obj, Exception):
|
|
162
|
+
return _safe_exception_payload(obj)
|
|
163
|
+
return _clip(repr(obj), fallback_clip)
|
|
164
|
+
raise TypeError(f"Type is not JSON serializable: {typ.__name__}")
|
|
165
|
+
return func(obj)
|
|
166
|
+
|
|
167
|
+
return default
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@lru_cache(maxsize=128)
|
|
171
|
+
def _cached_default(
|
|
172
|
+
deterministic_sets: bool,
|
|
173
|
+
decimal_as_float: bool,
|
|
174
|
+
enum_as_name: bool,
|
|
175
|
+
passthrough_datetime: bool,
|
|
176
|
+
safe_fallback: bool,
|
|
177
|
+
fallback_clip: int,
|
|
178
|
+
):
|
|
179
|
+
return get_orjson_default(
|
|
180
|
+
deterministic_sets=deterministic_sets,
|
|
181
|
+
decimal_as_float=decimal_as_float,
|
|
182
|
+
enum_as_name=enum_as_name,
|
|
183
|
+
passthrough_datetime=passthrough_datetime,
|
|
184
|
+
safe_fallback=safe_fallback,
|
|
185
|
+
fallback_clip=fallback_clip,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
# --------- defaults & options -------------------------------------------------
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def make_options(
|
|
193
|
+
*,
|
|
194
|
+
pretty: bool = False,
|
|
195
|
+
sort_keys: bool = False,
|
|
196
|
+
naive_utc: bool = False,
|
|
197
|
+
utc_z: bool = False,
|
|
198
|
+
append_newline: bool = False,
|
|
199
|
+
passthrough_datetime: bool = False,
|
|
200
|
+
allow_non_str_keys: bool = False,
|
|
201
|
+
) -> int:
|
|
202
|
+
"""
|
|
203
|
+
Compose orjson 'option' bit flags succinctly.
|
|
204
|
+
"""
|
|
205
|
+
opt = 0
|
|
206
|
+
if append_newline:
|
|
207
|
+
opt |= orjson.OPT_APPEND_NEWLINE
|
|
208
|
+
if pretty:
|
|
209
|
+
opt |= orjson.OPT_INDENT_2
|
|
210
|
+
if sort_keys:
|
|
211
|
+
opt |= orjson.OPT_SORT_KEYS
|
|
212
|
+
if naive_utc:
|
|
213
|
+
opt |= orjson.OPT_NAIVE_UTC
|
|
214
|
+
if utc_z:
|
|
215
|
+
opt |= orjson.OPT_UTC_Z
|
|
216
|
+
if passthrough_datetime:
|
|
217
|
+
opt |= orjson.OPT_PASSTHROUGH_DATETIME
|
|
218
|
+
if allow_non_str_keys:
|
|
219
|
+
opt |= orjson.OPT_NON_STR_KEYS
|
|
220
|
+
return opt
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
# --------- dump helpers -------------------------------------------------------
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def json_dumpb(
|
|
227
|
+
obj: Any,
|
|
228
|
+
*,
|
|
229
|
+
pretty: bool = False,
|
|
230
|
+
sort_keys: bool = False,
|
|
231
|
+
naive_utc: bool = False,
|
|
232
|
+
utc_z: bool = False,
|
|
233
|
+
append_newline: bool = False,
|
|
234
|
+
allow_non_str_keys: bool = False,
|
|
235
|
+
deterministic_sets: bool = False,
|
|
236
|
+
decimal_as_float: bool = False,
|
|
237
|
+
enum_as_name: bool = False,
|
|
238
|
+
passthrough_datetime: bool = False,
|
|
239
|
+
safe_fallback: bool = False,
|
|
240
|
+
fallback_clip: int = 2048,
|
|
241
|
+
default: Callable[[Any], Any] | None = None,
|
|
242
|
+
options: int | None = None,
|
|
243
|
+
) -> bytes:
|
|
244
|
+
"""
|
|
245
|
+
Serialize to **bytes** (fast path). Prefer this in hot code.
|
|
246
|
+
|
|
247
|
+
Notes:
|
|
248
|
+
- If you set passthrough_datetime=True, you likely also want it in options.
|
|
249
|
+
- safe_fallback=True is recommended for LOGGING ONLY.
|
|
250
|
+
"""
|
|
251
|
+
if default is None:
|
|
252
|
+
default = _cached_default(
|
|
253
|
+
deterministic_sets=deterministic_sets,
|
|
254
|
+
decimal_as_float=decimal_as_float,
|
|
255
|
+
enum_as_name=enum_as_name,
|
|
256
|
+
passthrough_datetime=passthrough_datetime,
|
|
257
|
+
safe_fallback=safe_fallback,
|
|
258
|
+
fallback_clip=fallback_clip,
|
|
259
|
+
)
|
|
260
|
+
opt = (
|
|
261
|
+
options
|
|
262
|
+
if options is not None
|
|
263
|
+
else make_options(
|
|
264
|
+
pretty=pretty,
|
|
265
|
+
sort_keys=sort_keys,
|
|
266
|
+
naive_utc=naive_utc,
|
|
267
|
+
utc_z=utc_z,
|
|
268
|
+
append_newline=append_newline,
|
|
269
|
+
passthrough_datetime=passthrough_datetime,
|
|
270
|
+
allow_non_str_keys=allow_non_str_keys,
|
|
271
|
+
)
|
|
272
|
+
)
|
|
273
|
+
return orjson.dumps(obj, default=default, option=opt)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def json_dumps(
|
|
277
|
+
obj: Any,
|
|
278
|
+
/,
|
|
279
|
+
*,
|
|
280
|
+
decode: bool = True,
|
|
281
|
+
**kwargs: Any,
|
|
282
|
+
) -> str | bytes:
|
|
283
|
+
"""
|
|
284
|
+
Serialize to str by default (decode=True), or bytes if decode=False.
|
|
285
|
+
"""
|
|
286
|
+
out = json_dumpb(obj, **kwargs)
|
|
287
|
+
return out.decode("utf-8") if decode else out
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
# --------- streaming for very large outputs ----------------------------------
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def json_lines_iter(
|
|
294
|
+
it: Iterable[Any],
|
|
295
|
+
*,
|
|
296
|
+
# default() configuration for each line
|
|
297
|
+
deterministic_sets: bool = False,
|
|
298
|
+
decimal_as_float: bool = False,
|
|
299
|
+
enum_as_name: bool = False,
|
|
300
|
+
passthrough_datetime: bool = False,
|
|
301
|
+
safe_fallback: bool = False,
|
|
302
|
+
fallback_clip: int = 2048,
|
|
303
|
+
# options
|
|
304
|
+
naive_utc: bool = False,
|
|
305
|
+
utc_z: bool = False,
|
|
306
|
+
allow_non_str_keys: bool = False,
|
|
307
|
+
# advanced
|
|
308
|
+
default: Callable[[Any], Any] | None = None,
|
|
309
|
+
options: int | None = None,
|
|
310
|
+
) -> Iterable[bytes]:
|
|
311
|
+
"""
|
|
312
|
+
Stream an iterable as **NDJSON** (one JSON object per line) in **bytes**.
|
|
313
|
+
|
|
314
|
+
Always ensures a trailing newline per line (OPT_APPEND_NEWLINE).
|
|
315
|
+
"""
|
|
316
|
+
if default is None:
|
|
317
|
+
default = _cached_default(
|
|
318
|
+
deterministic_sets=deterministic_sets,
|
|
319
|
+
decimal_as_float=decimal_as_float,
|
|
320
|
+
enum_as_name=enum_as_name,
|
|
321
|
+
passthrough_datetime=passthrough_datetime,
|
|
322
|
+
safe_fallback=safe_fallback,
|
|
323
|
+
fallback_clip=fallback_clip,
|
|
324
|
+
)
|
|
325
|
+
if options is None:
|
|
326
|
+
opt = make_options(
|
|
327
|
+
pretty=False,
|
|
328
|
+
sort_keys=False,
|
|
329
|
+
naive_utc=naive_utc,
|
|
330
|
+
utc_z=utc_z,
|
|
331
|
+
append_newline=True, # enforce newline for NDJSON
|
|
332
|
+
passthrough_datetime=passthrough_datetime,
|
|
333
|
+
allow_non_str_keys=allow_non_str_keys,
|
|
334
|
+
)
|
|
335
|
+
else:
|
|
336
|
+
opt = options | orjson.OPT_APPEND_NEWLINE
|
|
337
|
+
|
|
338
|
+
for item in it:
|
|
339
|
+
yield orjson.dumps(item, default=default, option=opt)
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def json_dict(
|
|
343
|
+
obj: Any,
|
|
344
|
+
/,
|
|
345
|
+
**kwargs: Any,
|
|
346
|
+
):
|
|
347
|
+
return orjson.loads(json_dumpb(obj, **kwargs))
|