krons 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kronos/__init__.py +0 -0
- kronos/core/__init__.py +145 -0
- kronos/core/broadcaster.py +116 -0
- kronos/core/element.py +225 -0
- kronos/core/event.py +316 -0
- kronos/core/eventbus.py +116 -0
- kronos/core/flow.py +356 -0
- kronos/core/graph.py +442 -0
- kronos/core/node.py +982 -0
- kronos/core/pile.py +575 -0
- kronos/core/processor.py +494 -0
- kronos/core/progression.py +296 -0
- kronos/enforcement/__init__.py +57 -0
- kronos/enforcement/common/__init__.py +34 -0
- kronos/enforcement/common/boolean.py +85 -0
- kronos/enforcement/common/choice.py +97 -0
- kronos/enforcement/common/mapping.py +118 -0
- kronos/enforcement/common/model.py +102 -0
- kronos/enforcement/common/number.py +98 -0
- kronos/enforcement/common/string.py +140 -0
- kronos/enforcement/context.py +129 -0
- kronos/enforcement/policy.py +80 -0
- kronos/enforcement/registry.py +153 -0
- kronos/enforcement/rule.py +312 -0
- kronos/enforcement/service.py +370 -0
- kronos/enforcement/validator.py +198 -0
- kronos/errors.py +146 -0
- kronos/operations/__init__.py +32 -0
- kronos/operations/builder.py +228 -0
- kronos/operations/flow.py +398 -0
- kronos/operations/node.py +101 -0
- kronos/operations/registry.py +92 -0
- kronos/protocols.py +414 -0
- kronos/py.typed +0 -0
- kronos/services/__init__.py +81 -0
- kronos/services/backend.py +286 -0
- kronos/services/endpoint.py +608 -0
- kronos/services/hook.py +471 -0
- kronos/services/imodel.py +465 -0
- kronos/services/registry.py +115 -0
- kronos/services/utilities/__init__.py +36 -0
- kronos/services/utilities/header_factory.py +87 -0
- kronos/services/utilities/rate_limited_executor.py +271 -0
- kronos/services/utilities/rate_limiter.py +180 -0
- kronos/services/utilities/resilience.py +414 -0
- kronos/session/__init__.py +41 -0
- kronos/session/exchange.py +258 -0
- kronos/session/message.py +60 -0
- kronos/session/session.py +411 -0
- kronos/specs/__init__.py +25 -0
- kronos/specs/adapters/__init__.py +0 -0
- kronos/specs/adapters/_utils.py +45 -0
- kronos/specs/adapters/dataclass_field.py +246 -0
- kronos/specs/adapters/factory.py +56 -0
- kronos/specs/adapters/pydantic_adapter.py +309 -0
- kronos/specs/adapters/sql_ddl.py +946 -0
- kronos/specs/catalog/__init__.py +36 -0
- kronos/specs/catalog/_audit.py +39 -0
- kronos/specs/catalog/_common.py +43 -0
- kronos/specs/catalog/_content.py +59 -0
- kronos/specs/catalog/_enforcement.py +70 -0
- kronos/specs/factory.py +120 -0
- kronos/specs/operable.py +314 -0
- kronos/specs/phrase.py +405 -0
- kronos/specs/protocol.py +140 -0
- kronos/specs/spec.py +506 -0
- kronos/types/__init__.py +60 -0
- kronos/types/_sentinel.py +311 -0
- kronos/types/base.py +369 -0
- kronos/types/db_types.py +260 -0
- kronos/types/identity.py +66 -0
- kronos/utils/__init__.py +40 -0
- kronos/utils/_hash.py +234 -0
- kronos/utils/_json_dump.py +392 -0
- kronos/utils/_lazy_init.py +63 -0
- kronos/utils/_to_list.py +165 -0
- kronos/utils/_to_num.py +85 -0
- kronos/utils/_utils.py +375 -0
- kronos/utils/concurrency/__init__.py +205 -0
- kronos/utils/concurrency/_async_call.py +333 -0
- kronos/utils/concurrency/_cancel.py +122 -0
- kronos/utils/concurrency/_errors.py +96 -0
- kronos/utils/concurrency/_patterns.py +363 -0
- kronos/utils/concurrency/_primitives.py +328 -0
- kronos/utils/concurrency/_priority_queue.py +135 -0
- kronos/utils/concurrency/_resource_tracker.py +110 -0
- kronos/utils/concurrency/_run_async.py +67 -0
- kronos/utils/concurrency/_task.py +95 -0
- kronos/utils/concurrency/_utils.py +79 -0
- kronos/utils/fuzzy/__init__.py +14 -0
- kronos/utils/fuzzy/_extract_json.py +90 -0
- kronos/utils/fuzzy/_fuzzy_json.py +288 -0
- kronos/utils/fuzzy/_fuzzy_match.py +149 -0
- kronos/utils/fuzzy/_string_similarity.py +187 -0
- kronos/utils/fuzzy/_to_dict.py +396 -0
- kronos/utils/sql/__init__.py +13 -0
- kronos/utils/sql/_sql_validation.py +142 -0
- krons-0.1.0.dist-info/METADATA +70 -0
- krons-0.1.0.dist-info/RECORD +101 -0
- krons-0.1.0.dist-info/WHEEL +4 -0
- krons-0.1.0.dist-info/licenses/LICENSE +201 -0
kronos/types/db_types.py
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
# Copyright (c) 2025 - 2026, HaiyangLi <quantocean.li at gmail dot com>
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
"""Type annotations for database fields.
|
|
5
|
+
|
|
6
|
+
Provides semantic typing for foreign keys and vector embeddings:
|
|
7
|
+
FK[Model] - Foreign key references to entity types
|
|
8
|
+
Vector[dim] - pgvector embeddings with dimension
|
|
9
|
+
|
|
10
|
+
Extraction:
|
|
11
|
+
extract_kron_db_meta(source, metas="BOTH")
|
|
12
|
+
Unified extraction from FieldInfo, annotations, or Spec objects.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import types
|
|
18
|
+
from typing import Annotated, Any, Literal, Union, get_args, get_origin
|
|
19
|
+
from uuid import UUID
|
|
20
|
+
|
|
21
|
+
from kronos.types._sentinel import Unset, UnsetType, not_sentinel
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _is_field_info(obj: Any) -> bool:
|
|
25
|
+
"""Runtime check for Pydantic FieldInfo without hard import."""
|
|
26
|
+
return type(obj).__name__ == "FieldInfo" and hasattr(obj, "metadata")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
"FK",
|
|
31
|
+
"FKMeta",
|
|
32
|
+
"Vector",
|
|
33
|
+
"VectorMeta",
|
|
34
|
+
"extract_kron_db_meta",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# =============================================================================
|
|
39
|
+
# Foreign Key
|
|
40
|
+
# =============================================================================
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class FKMeta:
|
|
44
|
+
"""Metadata for foreign key fields.
|
|
45
|
+
|
|
46
|
+
Carries:
|
|
47
|
+
- model: Referenced Entity/Node class (or string for forward refs)
|
|
48
|
+
- column: Referenced column (default "id")
|
|
49
|
+
- on_delete/on_update: Referential actions
|
|
50
|
+
- deferrable/initially_deferred: Constraint deferral
|
|
51
|
+
|
|
52
|
+
Example:
|
|
53
|
+
tenant_id: FK[Tenant] # FKMeta(Tenant, "id", "CASCADE", "CASCADE")
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
__slots__ = (
|
|
57
|
+
"model",
|
|
58
|
+
"column",
|
|
59
|
+
"on_delete",
|
|
60
|
+
"on_update",
|
|
61
|
+
"deferrable",
|
|
62
|
+
"initially_deferred",
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
def __init__(
|
|
66
|
+
self,
|
|
67
|
+
model: type | str,
|
|
68
|
+
column: str = "id",
|
|
69
|
+
on_delete: str = "CASCADE",
|
|
70
|
+
on_update: str = "CASCADE",
|
|
71
|
+
deferrable: bool = False,
|
|
72
|
+
initially_deferred: bool = False,
|
|
73
|
+
):
|
|
74
|
+
self.model = model
|
|
75
|
+
self.column = column
|
|
76
|
+
self.on_delete = on_delete
|
|
77
|
+
self.on_update = on_update
|
|
78
|
+
self.deferrable = deferrable
|
|
79
|
+
self.initially_deferred = initially_deferred
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def table_name(self) -> str:
|
|
83
|
+
"""Get referenced table name from model's config or convention."""
|
|
84
|
+
if isinstance(self.model, str):
|
|
85
|
+
return self.model.lower() + "s"
|
|
86
|
+
if hasattr(self.model, "node_config"):
|
|
87
|
+
config = self.model.node_config
|
|
88
|
+
if config and hasattr(config, "table_name"):
|
|
89
|
+
return config.table_name
|
|
90
|
+
if hasattr(self.model, "_table_name"):
|
|
91
|
+
return self.model._table_name
|
|
92
|
+
return self.model.__name__.lower() + "s"
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def is_resolved(self) -> bool:
|
|
96
|
+
"""Check if FK reference has been resolved to a class."""
|
|
97
|
+
return not isinstance(self.model, str)
|
|
98
|
+
|
|
99
|
+
def resolve(self, model_cls: type) -> None:
|
|
100
|
+
"""Resolve string reference to actual class."""
|
|
101
|
+
self.model = model_cls
|
|
102
|
+
|
|
103
|
+
def __repr__(self) -> str:
|
|
104
|
+
name = self.model if isinstance(self.model, str) else self.model.__name__
|
|
105
|
+
return f"FK[{name}]"
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class _FK:
|
|
109
|
+
"""Foreign key type annotation: FK[Model] -> Annotated[UUID, FKMeta(Model)]."""
|
|
110
|
+
|
|
111
|
+
def __class_getitem__(cls, model: type | str) -> Any:
|
|
112
|
+
return Annotated[UUID, FKMeta(model)]
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
FK = _FK
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
# =============================================================================
|
|
119
|
+
# Vector (pgvector)
|
|
120
|
+
# =============================================================================
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class VectorMeta:
|
|
124
|
+
"""Metadata for vector embedding fields.
|
|
125
|
+
|
|
126
|
+
Carries dimension for pgvector VECTOR(dim) type.
|
|
127
|
+
|
|
128
|
+
Example:
|
|
129
|
+
embedding: Vector[1536] # VectorMeta(1536)
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
__slots__ = ("dim",)
|
|
133
|
+
|
|
134
|
+
def __init__(self, dim: int):
|
|
135
|
+
if dim <= 0:
|
|
136
|
+
raise ValueError(f"Vector dimension must be positive, got {dim}")
|
|
137
|
+
self.dim = dim
|
|
138
|
+
|
|
139
|
+
def __repr__(self) -> str:
|
|
140
|
+
return f"Vector[{self.dim}]"
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class _Vector:
|
|
144
|
+
"""Vector type annotation: Vector[dim] -> Annotated[list[float], VectorMeta(dim)]."""
|
|
145
|
+
|
|
146
|
+
def __class_getitem__(cls, dim: int) -> Any:
|
|
147
|
+
return Annotated[list[float], VectorMeta(dim)]
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
Vector = _Vector
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
# =============================================================================
|
|
154
|
+
# Extraction
|
|
155
|
+
# =============================================================================
|
|
156
|
+
|
|
157
|
+
# Return type aliases for extract_kron_db_meta
|
|
158
|
+
_MetaResult = FKMeta | VectorMeta | UnsetType
|
|
159
|
+
_BothResult = tuple[FKMeta | UnsetType, VectorMeta | UnsetType]
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _find_in_annotation(annotation: Any, meta_type: type) -> Any | None:
|
|
163
|
+
"""Find metadata of given type in an annotation (Annotated or Union)."""
|
|
164
|
+
# Direct Annotated[T, Meta(...)]
|
|
165
|
+
if get_origin(annotation) is Annotated:
|
|
166
|
+
for arg in get_args(annotation):
|
|
167
|
+
if isinstance(arg, meta_type):
|
|
168
|
+
return arg
|
|
169
|
+
|
|
170
|
+
# Union (T | None) with Annotated members
|
|
171
|
+
origin = get_origin(annotation)
|
|
172
|
+
if origin is Union or isinstance(annotation, types.UnionType):
|
|
173
|
+
for member in get_args(annotation):
|
|
174
|
+
if get_origin(member) is Annotated:
|
|
175
|
+
for arg in get_args(member):
|
|
176
|
+
if isinstance(arg, meta_type):
|
|
177
|
+
return arg
|
|
178
|
+
|
|
179
|
+
return None
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _find_in_field_info(field_info: Any, meta_type: type) -> Any | None:
|
|
183
|
+
"""Find metadata of given type in a Pydantic FieldInfo."""
|
|
184
|
+
# Pydantic v2: metadata list
|
|
185
|
+
if hasattr(field_info, "metadata"):
|
|
186
|
+
for item in field_info.metadata:
|
|
187
|
+
if isinstance(item, meta_type):
|
|
188
|
+
return item
|
|
189
|
+
# Pydantic may store Annotated types in metadata
|
|
190
|
+
if get_origin(item) is Annotated:
|
|
191
|
+
found = _find_in_annotation(item, meta_type)
|
|
192
|
+
if found is not None:
|
|
193
|
+
return found
|
|
194
|
+
|
|
195
|
+
# Fallback: check annotation
|
|
196
|
+
annotation = getattr(field_info, "annotation", None)
|
|
197
|
+
if annotation is not None:
|
|
198
|
+
return _find_in_annotation(annotation, meta_type)
|
|
199
|
+
|
|
200
|
+
return None
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def extract_kron_db_meta(
|
|
204
|
+
from_: Any,
|
|
205
|
+
metas: Literal["FK", "Vector", "BOTH"] = "BOTH",
|
|
206
|
+
) -> _MetaResult | _BothResult:
|
|
207
|
+
"""Extract FK and/or Vector metadata from a source.
|
|
208
|
+
|
|
209
|
+
Unified extraction dispatching on source type:
|
|
210
|
+
- FieldInfo: searches Pydantic metadata and annotation
|
|
211
|
+
- type/annotation: searches Annotated/Union structure
|
|
212
|
+
- Spec: reads spec metadata directly
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
from_: FieldInfo, type annotation, or Spec instance
|
|
216
|
+
metas: What to extract - "FK", "Vector", or "BOTH"
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
"FK" or "Vector": The meta object or Unset if not found
|
|
220
|
+
"BOTH": Tuple of (fk_meta_or_Unset, vector_meta_or_Unset)
|
|
221
|
+
"""
|
|
222
|
+
fk: FKMeta | UnsetType = Unset
|
|
223
|
+
vec: VectorMeta | UnsetType = Unset
|
|
224
|
+
|
|
225
|
+
if _is_field_info(from_):
|
|
226
|
+
if metas in ("FK", "BOTH"):
|
|
227
|
+
fk = _find_in_field_info(from_, FKMeta) or Unset
|
|
228
|
+
if metas in ("Vector", "BOTH"):
|
|
229
|
+
vec = _find_in_field_info(from_, VectorMeta) or Unset
|
|
230
|
+
|
|
231
|
+
elif get_origin(from_) is not None or isinstance(from_, type):
|
|
232
|
+
# Raw type annotation
|
|
233
|
+
if metas in ("FK", "BOTH"):
|
|
234
|
+
fk = _find_in_annotation(from_, FKMeta) or Unset
|
|
235
|
+
if metas in ("Vector", "BOTH"):
|
|
236
|
+
vec = _find_in_annotation(from_, VectorMeta) or Unset
|
|
237
|
+
|
|
238
|
+
else:
|
|
239
|
+
# Try Spec (lazy import to avoid circular)
|
|
240
|
+
from kronos.specs.spec import Spec
|
|
241
|
+
|
|
242
|
+
if isinstance(from_, Spec):
|
|
243
|
+
if metas in ("FK", "BOTH"):
|
|
244
|
+
fk_val = from_.get("as_fk", Unset)
|
|
245
|
+
if not_sentinel(fk_val, {"none"}) and isinstance(fk_val, FKMeta):
|
|
246
|
+
fk = fk_val
|
|
247
|
+
if metas in ("Vector", "BOTH"):
|
|
248
|
+
vec_val = from_.get("embedding", Unset)
|
|
249
|
+
if not_sentinel(vec_val, {"none"}) and isinstance(vec_val, VectorMeta):
|
|
250
|
+
vec = vec_val
|
|
251
|
+
else:
|
|
252
|
+
raise TypeError(
|
|
253
|
+
f"from_ must be FieldInfo, type annotation, or Spec, got {type(from_).__name__}"
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
if metas == "FK":
|
|
257
|
+
return fk
|
|
258
|
+
if metas == "Vector":
|
|
259
|
+
return vec
|
|
260
|
+
return (fk, vec)
|
kronos/types/identity.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# Copyright (c) 2025 - 2026, HaiyangLi <quantocean.li at gmail dot com>
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
"""Semantic UUID typing for type-safe entity identification.
|
|
5
|
+
|
|
6
|
+
Provides ID[T] syntax for associating UUIDs with specific model types,
|
|
7
|
+
enabling compile-time type checking and runtime semantic clarity.
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
from kronos.types import ID
|
|
11
|
+
|
|
12
|
+
user_id: ID[User] = uuid4()
|
|
13
|
+
org_id: ID[Organization] = uuid4()
|
|
14
|
+
|
|
15
|
+
# Type checker distinguishes these despite both being UUID at runtime
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from typing import Annotated
|
|
19
|
+
from uuid import UUID
|
|
20
|
+
|
|
21
|
+
__all__ = ("ID",)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class _IDMeta(type):
|
|
25
|
+
"""Metaclass enabling ID[T] syntax for semantic UUID typing.
|
|
26
|
+
|
|
27
|
+
At runtime: ID[Model] returns Annotated[UUID, ("ID", Model)]
|
|
28
|
+
For type checkers: Provides semantic distinction between UUID types.
|
|
29
|
+
|
|
30
|
+
This allows code to express intent clearly:
|
|
31
|
+
user_id: ID[User] # A UUID that identifies a User
|
|
32
|
+
tenant_id: ID[Tenant] # A UUID that identifies a Tenant
|
|
33
|
+
|
|
34
|
+
Both are UUIDs at runtime, but type checkers can distinguish them.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __getitem__(cls, item: type) -> type:
|
|
38
|
+
return Annotated[UUID, ("ID", item)]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class ID(UUID, metaclass=_IDMeta):
|
|
42
|
+
"""Semantic UUID type with model association.
|
|
43
|
+
|
|
44
|
+
ID[T] creates a type annotation that:
|
|
45
|
+
- At runtime: Is equivalent to UUID
|
|
46
|
+
- For type checkers: Associates the UUID with model type T
|
|
47
|
+
|
|
48
|
+
This enables self-documenting code and catches type mismatches:
|
|
49
|
+
|
|
50
|
+
class User(Node): ...
|
|
51
|
+
class Tenant(Node): ...
|
|
52
|
+
|
|
53
|
+
def get_user(user_id: ID[User]) -> User: ...
|
|
54
|
+
def get_tenant(tenant_id: ID[Tenant]) -> Tenant: ...
|
|
55
|
+
|
|
56
|
+
uid: ID[User] = uuid4()
|
|
57
|
+
tid: ID[Tenant] = uuid4()
|
|
58
|
+
|
|
59
|
+
get_user(uid) # OK
|
|
60
|
+
get_user(tid) # Type error: expected ID[User], got ID[Tenant]
|
|
61
|
+
|
|
62
|
+
The semantic typing is purely for documentation and static analysis;
|
|
63
|
+
at runtime, ID[User] and ID[Tenant] are both just UUIDs.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
pass
|
kronos/utils/__init__.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from ._hash import (
|
|
2
|
+
GENESIS_HASH,
|
|
3
|
+
MAX_HASH_INPUT_BYTES,
|
|
4
|
+
HashAlgorithm,
|
|
5
|
+
compute_chain_hash,
|
|
6
|
+
compute_hash,
|
|
7
|
+
hash_obj,
|
|
8
|
+
)
|
|
9
|
+
from ._json_dump import json_dump, json_dumpb, json_lines_iter
|
|
10
|
+
from ._to_list import to_list
|
|
11
|
+
from ._to_num import to_num
|
|
12
|
+
from ._utils import (
|
|
13
|
+
async_synchronized,
|
|
14
|
+
coerce_created_at,
|
|
15
|
+
create_path,
|
|
16
|
+
extract_types,
|
|
17
|
+
get_bins,
|
|
18
|
+
import_module,
|
|
19
|
+
is_import_installed,
|
|
20
|
+
load_type_from_string,
|
|
21
|
+
now_utc,
|
|
22
|
+
register_type_prefix,
|
|
23
|
+
synchronized,
|
|
24
|
+
to_uuid,
|
|
25
|
+
)
|
|
26
|
+
from .concurrency import alcall, is_coro_func
|
|
27
|
+
from .fuzzy import (
|
|
28
|
+
SimilarityAlgo,
|
|
29
|
+
extract_json,
|
|
30
|
+
fuzzy_json,
|
|
31
|
+
fuzzy_match_keys,
|
|
32
|
+
string_similarity,
|
|
33
|
+
to_dict,
|
|
34
|
+
)
|
|
35
|
+
from .sql._sql_validation import (
|
|
36
|
+
MAX_IDENTIFIER_LENGTH,
|
|
37
|
+
SAFE_IDENTIFIER_PATTERN,
|
|
38
|
+
sanitize_order_by,
|
|
39
|
+
validate_identifier,
|
|
40
|
+
)
|
kronos/utils/_hash.py
ADDED
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
# Copyright (c) 2025 - 2026, HaiyangLi <quantocean.li at gmail dot com>
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import contextlib
|
|
7
|
+
import copy
|
|
8
|
+
import hashlib
|
|
9
|
+
from collections.abc import Callable
|
|
10
|
+
from enum import Enum
|
|
11
|
+
from typing import TYPE_CHECKING, Any
|
|
12
|
+
|
|
13
|
+
from ._json_dump import json_dumpb
|
|
14
|
+
from ._lazy_init import LazyInit
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from hashlib import _Hash
|
|
18
|
+
|
|
19
|
+
__all__ = (
|
|
20
|
+
"GENESIS_HASH",
|
|
21
|
+
"HashAlgorithm",
|
|
22
|
+
"compute_chain_hash",
|
|
23
|
+
"compute_hash",
|
|
24
|
+
"hash_obj",
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
_lazy = LazyInit()
|
|
28
|
+
PydanticBaseModel = None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _do_init() -> None:
|
|
32
|
+
"""Lazy-initialize Pydantic BaseModel reference."""
|
|
33
|
+
global PydanticBaseModel
|
|
34
|
+
from pydantic import BaseModel
|
|
35
|
+
|
|
36
|
+
PydanticBaseModel = BaseModel
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
_PRIMITIVE_TYPES = (str, int, float, bool, type(None))
|
|
40
|
+
_TYPE_MARKER_DICT = 0
|
|
41
|
+
_TYPE_MARKER_LIST = 1
|
|
42
|
+
_TYPE_MARKER_TUPLE = 2
|
|
43
|
+
_TYPE_MARKER_SET = 3
|
|
44
|
+
_TYPE_MARKER_FROZENSET = 4
|
|
45
|
+
_TYPE_MARKER_PYDANTIC = 5
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _generate_hashable_representation(item: Any) -> Any:
|
|
49
|
+
"""Convert object to stable, order-independent hashable representation.
|
|
50
|
+
|
|
51
|
+
Recursively transforms dicts/sets into sorted tuples with type markers
|
|
52
|
+
to ensure consistent hashing regardless of insertion order.
|
|
53
|
+
"""
|
|
54
|
+
if isinstance(item, _PRIMITIVE_TYPES):
|
|
55
|
+
return item
|
|
56
|
+
|
|
57
|
+
if PydanticBaseModel and isinstance(item, PydanticBaseModel):
|
|
58
|
+
return (
|
|
59
|
+
_TYPE_MARKER_PYDANTIC,
|
|
60
|
+
_generate_hashable_representation(item.model_dump()),
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
if isinstance(item, dict):
|
|
64
|
+
return (
|
|
65
|
+
_TYPE_MARKER_DICT,
|
|
66
|
+
tuple(
|
|
67
|
+
(str(k), _generate_hashable_representation(v))
|
|
68
|
+
for k, v in sorted(item.items(), key=lambda x: str(x[0]))
|
|
69
|
+
),
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
if isinstance(item, list):
|
|
73
|
+
return (
|
|
74
|
+
_TYPE_MARKER_LIST,
|
|
75
|
+
tuple(_generate_hashable_representation(elem) for elem in item),
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
if isinstance(item, tuple):
|
|
79
|
+
return (
|
|
80
|
+
_TYPE_MARKER_TUPLE,
|
|
81
|
+
tuple(_generate_hashable_representation(elem) for elem in item),
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
if isinstance(item, frozenset):
|
|
85
|
+
try:
|
|
86
|
+
sorted_elements = sorted(list(item))
|
|
87
|
+
except TypeError:
|
|
88
|
+
sorted_elements = sorted(list(item), key=lambda x: (str(type(x)), str(x)))
|
|
89
|
+
return (
|
|
90
|
+
_TYPE_MARKER_FROZENSET,
|
|
91
|
+
tuple(_generate_hashable_representation(elem) for elem in sorted_elements),
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
if isinstance(item, set):
|
|
95
|
+
try:
|
|
96
|
+
sorted_elements = sorted(list(item))
|
|
97
|
+
except TypeError:
|
|
98
|
+
sorted_elements = sorted(list(item), key=lambda x: (str(type(x)), str(x)))
|
|
99
|
+
return (
|
|
100
|
+
_TYPE_MARKER_SET,
|
|
101
|
+
tuple(_generate_hashable_representation(elem) for elem in sorted_elements),
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
with contextlib.suppress(Exception):
|
|
105
|
+
return str(item)
|
|
106
|
+
with contextlib.suppress(Exception):
|
|
107
|
+
return repr(item)
|
|
108
|
+
|
|
109
|
+
return f"<unhashable:{type(item).__name__}:{id(item)}>"
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def hash_obj(data: Any, strict: bool = False) -> int:
|
|
113
|
+
"""Generate stable int hash for Python __hash__() protocol.
|
|
114
|
+
|
|
115
|
+
Use for: set/dict membership, deduplication, __hash__ implementations.
|
|
116
|
+
NOT for: cryptographic integrity, content verification (use compute_hash).
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
data: Any data structure (dicts, lists, Pydantic models, nested).
|
|
120
|
+
strict: Deep-copy data before hashing to prevent mutation effects.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
Stable int hash suitable for hash-based collections.
|
|
124
|
+
|
|
125
|
+
Raises:
|
|
126
|
+
TypeError: If generated representation is not hashable.
|
|
127
|
+
"""
|
|
128
|
+
_lazy.ensure(_do_init)
|
|
129
|
+
|
|
130
|
+
data_to_process = data
|
|
131
|
+
if strict:
|
|
132
|
+
data_to_process = copy.deepcopy(data)
|
|
133
|
+
|
|
134
|
+
hashable_repr = _generate_hashable_representation(data_to_process)
|
|
135
|
+
|
|
136
|
+
try:
|
|
137
|
+
return hash(hashable_repr)
|
|
138
|
+
except TypeError as e:
|
|
139
|
+
raise TypeError(
|
|
140
|
+
f"The generated representation for the input data was not hashable. "
|
|
141
|
+
f"Input type: {type(data).__name__}, Representation type: {type(hashable_repr).__name__}. "
|
|
142
|
+
f"Original error: {e}"
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
MAX_HASH_INPUT_BYTES = 10 * 1024 * 1024
|
|
147
|
+
"""Max hash input (10MB). SOC2 CC7.1 DoS prevention."""
|
|
148
|
+
|
|
149
|
+
GENESIS_HASH: str = "GENESIS"
|
|
150
|
+
"""Sentinel for first entry in hash chain."""
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class HashAlgorithm(Enum):
|
|
154
|
+
"""NIST FIPS 180-4 compliant hash algorithms for cryptographic integrity."""
|
|
155
|
+
|
|
156
|
+
SHA256 = "sha256"
|
|
157
|
+
SHA384 = "sha384"
|
|
158
|
+
SHA512 = "sha512"
|
|
159
|
+
|
|
160
|
+
def get_hasher(self) -> Callable[..., _Hash]:
|
|
161
|
+
"""Return hashlib constructor for this algorithm."""
|
|
162
|
+
return {
|
|
163
|
+
HashAlgorithm.SHA256: hashlib.sha256,
|
|
164
|
+
HashAlgorithm.SHA384: hashlib.sha384,
|
|
165
|
+
HashAlgorithm.SHA512: hashlib.sha512,
|
|
166
|
+
}[self]
|
|
167
|
+
|
|
168
|
+
@property
|
|
169
|
+
def digest_size(self) -> int:
|
|
170
|
+
"""Digest size in bytes (32/48/64 for SHA256/384/512)."""
|
|
171
|
+
return {
|
|
172
|
+
HashAlgorithm.SHA256: 32,
|
|
173
|
+
HashAlgorithm.SHA384: 48,
|
|
174
|
+
HashAlgorithm.SHA512: 64,
|
|
175
|
+
}[self]
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def compute_hash(
|
|
179
|
+
obj: dict | str | bytes | Any,
|
|
180
|
+
algorithm: HashAlgorithm = HashAlgorithm.SHA256,
|
|
181
|
+
none_as_valid: bool = False,
|
|
182
|
+
) -> str:
|
|
183
|
+
"""Compute cryptographic hash for content integrity verification.
|
|
184
|
+
|
|
185
|
+
Use for: content integrity, tamper detection, evidence chains.
|
|
186
|
+
NOT for: __hash__ protocol, set/dict membership (use hash_obj).
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
obj: Data to hash (dict, str, bytes, or JSON-serializable).
|
|
190
|
+
algorithm: Hash algorithm (default SHA-256).
|
|
191
|
+
none_as_valid: Treat None as valid input (hashes as "null").
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
Hex-encoded hash digest string.
|
|
195
|
+
|
|
196
|
+
Raises:
|
|
197
|
+
ValueError: If payload exceeds MAX_HASH_INPUT_BYTES (10MB).
|
|
198
|
+
"""
|
|
199
|
+
payload: bytes
|
|
200
|
+
if none_as_valid and obj is None:
|
|
201
|
+
payload = b"null"
|
|
202
|
+
elif isinstance(obj, bytes):
|
|
203
|
+
payload = obj
|
|
204
|
+
elif isinstance(obj, str):
|
|
205
|
+
payload = obj.encode()
|
|
206
|
+
else:
|
|
207
|
+
payload = json_dumpb(obj, sort_keys=True, deterministic_sets=True)
|
|
208
|
+
|
|
209
|
+
if len(payload) > MAX_HASH_INPUT_BYTES:
|
|
210
|
+
raise ValueError(f"Payload {len(payload):,}B > {MAX_HASH_INPUT_BYTES:,}B limit")
|
|
211
|
+
|
|
212
|
+
hasher = algorithm.get_hasher()
|
|
213
|
+
return hasher(payload).hexdigest()
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def compute_chain_hash(
|
|
217
|
+
payload_hash: str,
|
|
218
|
+
previous_hash: str | None,
|
|
219
|
+
algorithm: HashAlgorithm = HashAlgorithm.SHA256,
|
|
220
|
+
) -> str:
|
|
221
|
+
"""Compute chain hash linking current entry to previous.
|
|
222
|
+
|
|
223
|
+
Formula: HASH("{payload_hash}:{previous_hash or 'GENESIS'}")
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
payload_hash: Hash of current entry's payload.
|
|
227
|
+
previous_hash: Hash of previous entry (None for genesis entry).
|
|
228
|
+
algorithm: Hash algorithm to use.
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
Hex-encoded chain hash for tamper-evident linking.
|
|
232
|
+
"""
|
|
233
|
+
chain_input = f"{payload_hash}:{previous_hash or GENESIS_HASH}"
|
|
234
|
+
return compute_hash(chain_input, algorithm)
|