krons 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. kronos/__init__.py +0 -0
  2. kronos/core/__init__.py +145 -0
  3. kronos/core/broadcaster.py +116 -0
  4. kronos/core/element.py +225 -0
  5. kronos/core/event.py +316 -0
  6. kronos/core/eventbus.py +116 -0
  7. kronos/core/flow.py +356 -0
  8. kronos/core/graph.py +442 -0
  9. kronos/core/node.py +982 -0
  10. kronos/core/pile.py +575 -0
  11. kronos/core/processor.py +494 -0
  12. kronos/core/progression.py +296 -0
  13. kronos/enforcement/__init__.py +57 -0
  14. kronos/enforcement/common/__init__.py +34 -0
  15. kronos/enforcement/common/boolean.py +85 -0
  16. kronos/enforcement/common/choice.py +97 -0
  17. kronos/enforcement/common/mapping.py +118 -0
  18. kronos/enforcement/common/model.py +102 -0
  19. kronos/enforcement/common/number.py +98 -0
  20. kronos/enforcement/common/string.py +140 -0
  21. kronos/enforcement/context.py +129 -0
  22. kronos/enforcement/policy.py +80 -0
  23. kronos/enforcement/registry.py +153 -0
  24. kronos/enforcement/rule.py +312 -0
  25. kronos/enforcement/service.py +370 -0
  26. kronos/enforcement/validator.py +198 -0
  27. kronos/errors.py +146 -0
  28. kronos/operations/__init__.py +32 -0
  29. kronos/operations/builder.py +228 -0
  30. kronos/operations/flow.py +398 -0
  31. kronos/operations/node.py +101 -0
  32. kronos/operations/registry.py +92 -0
  33. kronos/protocols.py +414 -0
  34. kronos/py.typed +0 -0
  35. kronos/services/__init__.py +81 -0
  36. kronos/services/backend.py +286 -0
  37. kronos/services/endpoint.py +608 -0
  38. kronos/services/hook.py +471 -0
  39. kronos/services/imodel.py +465 -0
  40. kronos/services/registry.py +115 -0
  41. kronos/services/utilities/__init__.py +36 -0
  42. kronos/services/utilities/header_factory.py +87 -0
  43. kronos/services/utilities/rate_limited_executor.py +271 -0
  44. kronos/services/utilities/rate_limiter.py +180 -0
  45. kronos/services/utilities/resilience.py +414 -0
  46. kronos/session/__init__.py +41 -0
  47. kronos/session/exchange.py +258 -0
  48. kronos/session/message.py +60 -0
  49. kronos/session/session.py +411 -0
  50. kronos/specs/__init__.py +25 -0
  51. kronos/specs/adapters/__init__.py +0 -0
  52. kronos/specs/adapters/_utils.py +45 -0
  53. kronos/specs/adapters/dataclass_field.py +246 -0
  54. kronos/specs/adapters/factory.py +56 -0
  55. kronos/specs/adapters/pydantic_adapter.py +309 -0
  56. kronos/specs/adapters/sql_ddl.py +946 -0
  57. kronos/specs/catalog/__init__.py +36 -0
  58. kronos/specs/catalog/_audit.py +39 -0
  59. kronos/specs/catalog/_common.py +43 -0
  60. kronos/specs/catalog/_content.py +59 -0
  61. kronos/specs/catalog/_enforcement.py +70 -0
  62. kronos/specs/factory.py +120 -0
  63. kronos/specs/operable.py +314 -0
  64. kronos/specs/phrase.py +405 -0
  65. kronos/specs/protocol.py +140 -0
  66. kronos/specs/spec.py +506 -0
  67. kronos/types/__init__.py +60 -0
  68. kronos/types/_sentinel.py +311 -0
  69. kronos/types/base.py +369 -0
  70. kronos/types/db_types.py +260 -0
  71. kronos/types/identity.py +66 -0
  72. kronos/utils/__init__.py +40 -0
  73. kronos/utils/_hash.py +234 -0
  74. kronos/utils/_json_dump.py +392 -0
  75. kronos/utils/_lazy_init.py +63 -0
  76. kronos/utils/_to_list.py +165 -0
  77. kronos/utils/_to_num.py +85 -0
  78. kronos/utils/_utils.py +375 -0
  79. kronos/utils/concurrency/__init__.py +205 -0
  80. kronos/utils/concurrency/_async_call.py +333 -0
  81. kronos/utils/concurrency/_cancel.py +122 -0
  82. kronos/utils/concurrency/_errors.py +96 -0
  83. kronos/utils/concurrency/_patterns.py +363 -0
  84. kronos/utils/concurrency/_primitives.py +328 -0
  85. kronos/utils/concurrency/_priority_queue.py +135 -0
  86. kronos/utils/concurrency/_resource_tracker.py +110 -0
  87. kronos/utils/concurrency/_run_async.py +67 -0
  88. kronos/utils/concurrency/_task.py +95 -0
  89. kronos/utils/concurrency/_utils.py +79 -0
  90. kronos/utils/fuzzy/__init__.py +14 -0
  91. kronos/utils/fuzzy/_extract_json.py +90 -0
  92. kronos/utils/fuzzy/_fuzzy_json.py +288 -0
  93. kronos/utils/fuzzy/_fuzzy_match.py +149 -0
  94. kronos/utils/fuzzy/_string_similarity.py +187 -0
  95. kronos/utils/fuzzy/_to_dict.py +396 -0
  96. kronos/utils/sql/__init__.py +13 -0
  97. kronos/utils/sql/_sql_validation.py +142 -0
  98. krons-0.1.0.dist-info/METADATA +70 -0
  99. krons-0.1.0.dist-info/RECORD +101 -0
  100. krons-0.1.0.dist-info/WHEEL +4 -0
  101. krons-0.1.0.dist-info/licenses/LICENSE +201 -0
kronos/core/node.py ADDED
@@ -0,0 +1,982 @@
1
+ # Copyright (c) 2025 - 2026, HaiyangLi <quantocean.li at gmail dot com>
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Node: Persistable element with structured content and polymorphic serialization.
5
+
6
+ Provides Node (extends Element), NodeConfig, create_node factory, and DDL generation.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from collections.abc import Callable
12
+ from dataclasses import dataclass, field
13
+ from typing import Any, ClassVar, Literal
14
+ from uuid import UUID
15
+
16
+ from pydantic import BaseModel, field_serializer, field_validator
17
+
18
+ from kronos.protocols import Deserializable, Serializable, implements
19
+ from kronos.types import (
20
+ ModelConfig,
21
+ Params,
22
+ Unset,
23
+ UnsetType,
24
+ is_sentinel,
25
+ is_unset,
26
+ not_sentinel,
27
+ )
28
+ from kronos.types.db_types import VectorMeta, extract_kron_db_meta
29
+ from kronos.utils import compute_hash, json_dump, now_utc
30
+
31
+ from .element import Element
32
+
33
+ # --- Registries ---
34
+ # NODE_REGISTRY: Polymorphic lookup by class name (full or short)
35
+ # PERSISTABLE_NODE_REGISTRY: DB-bound nodes by table_name (DDL generation)
36
+
37
+ NODE_REGISTRY: dict[str, type[Node]] = {}
38
+ PERSISTABLE_NODE_REGISTRY: dict[str, type[Node]] = {}
39
+
40
+
41
+ def _register_persistable(table_name: str, cls: type[Node]) -> None:
42
+ """Register Node class for DB persistence. Idempotent, detects collisions."""
43
+ if table_name in PERSISTABLE_NODE_REGISTRY:
44
+ existing = PERSISTABLE_NODE_REGISTRY[table_name]
45
+ if existing is not cls:
46
+ raise ValueError(
47
+ f"Table '{table_name}' already registered by "
48
+ f"{existing.__module__}.{existing.__name__}, "
49
+ f"cannot register {cls.__module__}.{cls.__name__}"
50
+ )
51
+ return
52
+ PERSISTABLE_NODE_REGISTRY[table_name] = cls
53
+
54
+
55
+ def _enable_embedding_requires_dim(config: NodeConfig) -> None:
56
+ """Validate: embedding_enabled requires positive embedding_dim."""
57
+ if config.embedding_enabled:
58
+ if config.is_sentinel_field("embedding_dim"):
59
+ raise ValueError("embedding_dim must be specified when embedding is enabled")
60
+ if config.embedding_dim <= 0:
61
+ raise ValueError(f"embedding_dim must be positive, got {config.embedding_dim}")
62
+
63
+
64
+ def _only_typed_content_can_flatten(config: NodeConfig) -> None:
65
+ """Validate: flatten_content requires explicit content_type."""
66
+ if config.flatten_content and config.is_sentinel_field("content_type"):
67
+ raise ValueError("content_type must be specified when flatten_content is True")
68
+
69
+
70
+ @dataclass(frozen=True, slots=True, init=False)
71
+ class NodeConfig(Params):
72
+ """Immutable configuration for Node persistence and behavior.
73
+
74
+ Controls DB schema mapping, content handling, embedding support, and audit trail.
75
+ Pass to create_node() or set as class attribute on Node subclasses.
76
+
77
+ Field Groups:
78
+ DB Mapping: table_name, schema, meta_key
79
+ Embedding: embedding_enabled, embedding_dim, embedding_format
80
+ Time: time_format, timezone
81
+ Polymorphism: polymorphic, registry_key
82
+ Content: flatten_content, content_frozen, content_nullable, content_type
83
+ Audit: content_hashing, integrity_hashing, soft_delete, versioning, track_*
84
+
85
+ Validation Rules:
86
+ - embedding_enabled=True requires positive embedding_dim
87
+ - flatten_content=True requires explicit content_type
88
+
89
+ Usage:
90
+ # Via create_node (preferred)
91
+ Job = create_node("Job", table_name="jobs", soft_delete=True)
92
+
93
+ # Via class attribute (advanced)
94
+ class Job(Node):
95
+ node_config = NodeConfig(table_name="jobs", soft_delete=True)
96
+ """
97
+
98
+ _config: ClassVar[ModelConfig] = ModelConfig(
99
+ sentinel_additions=frozenset({"none", "empty"}),
100
+ prefill_unset=False,
101
+ )
102
+
103
+ # DB Mapping
104
+ table_name: str | UnsetType = Unset
105
+ schema: str = "public"
106
+ meta_key: str = "node_metadata"
107
+
108
+ # Embedding
109
+ embedding_enabled: bool = False
110
+ embedding_dim: int | UnsetType = Unset
111
+ embedding_format: Literal["pgvector", "jsonb", "list"] = "pgvector"
112
+
113
+ # Time
114
+ time_format: Literal["datetime", "isoformat", "timestamp"] = "isoformat"
115
+ timezone: str = "UTC"
116
+
117
+ # Polymorphism
118
+ polymorphic: bool = False
119
+ registry_key: str | UnsetType = Unset
120
+
121
+ # Content
122
+ flatten_content: bool = False
123
+ content_frozen: bool = False
124
+ content_nullable: bool = False
125
+ content_type: type | UnsetType = Unset
126
+
127
+ # Audit & Lifecycle
128
+ content_hashing: bool = False
129
+ integrity_hashing: bool = False
130
+ soft_delete: bool = False
131
+ track_deleted_by: bool = False
132
+ track_is_active: bool = False
133
+ versioning: bool = False
134
+ track_updated_at: bool = False
135
+ track_updated_by: bool = False
136
+
137
+ # Additional
138
+ db_extra: dict[str, Any] = field(default_factory=dict)
139
+
140
+ def __post_init__(self) -> None:
141
+ """Run validation rules after initialization."""
142
+ _enable_embedding_requires_dim(self)
143
+ _only_typed_content_can_flatten(self)
144
+
145
+ @property
146
+ def is_persisted(self) -> bool:
147
+ """True if table_name is set (node has DB backing)."""
148
+ return not self.is_sentinel_field("table_name")
149
+
150
+ @property
151
+ def has_audit_fields(self) -> bool:
152
+ """True if any audit/lifecycle tracking is enabled."""
153
+ return (
154
+ self.content_hashing
155
+ or self.integrity_hashing
156
+ or self.soft_delete
157
+ or self.versioning
158
+ or self.track_updated_at
159
+ )
160
+
161
+
162
+ @implements(
163
+ Deserializable,
164
+ Serializable,
165
+ )
166
+ class Node(Element):
167
+ """Persistable element with structured content and polymorphic serialization.
168
+
169
+ Extends Element with:
170
+ - NodeConfig: DB persistence, audit trail, embedding support
171
+ - content: Typed field (BaseModel, Serializable, dict, or None)
172
+ - Polymorphic from_dict/to_dict via NODE_REGISTRY lookup
173
+
174
+ Class Attributes:
175
+ node_config: NodeConfig instance (None = default config)
176
+ content: Structured payload (validated, serializable)
177
+
178
+ Lifecycle Methods (config-dependent):
179
+ touch(by): Update timestamps, version, rehash
180
+ soft_delete(by): Mark deleted (reversible)
181
+ restore(by): Undelete
182
+ activate(by): Mark active (requires track_is_active)
183
+ deactivate(by): Mark inactive (requires track_is_active)
184
+ rehash(): Recompute content_hash
185
+
186
+ See Also:
187
+ create_node(): Factory for Node subclasses with enforced config
188
+ generate_ddl(): Generate CREATE TABLE from Node class
189
+
190
+ """
191
+
192
+ node_config: ClassVar[NodeConfig | None] = None
193
+ content: dict[str, Any] | Serializable | BaseModel | UnsetType | None = None
194
+
195
+ _resolved_content_type: ClassVar[type | None] = None
196
+
197
+ @classmethod
198
+ def get_config(cls) -> NodeConfig:
199
+ """Return node_config or default NodeConfig if not set."""
200
+ if cls.node_config is None:
201
+ return NodeConfig()
202
+ return cls.node_config
203
+
204
+ @classmethod
205
+ def __pydantic_init_subclass__(cls, **kwargs: Any) -> None:
206
+ """Auto-register in NODE_REGISTRY and PERSISTABLE_NODE_REGISTRY."""
207
+ super().__pydantic_init_subclass__(**kwargs)
208
+
209
+ config = cls.get_config()
210
+
211
+ # Register in NODE_REGISTRY (polymorphic lookup)
212
+ if config.polymorphic:
213
+ registry_key = (
214
+ cls.class_name(full=True)
215
+ if config.is_sentinel_field("registry_key")
216
+ else config.registry_key
217
+ )
218
+ NODE_REGISTRY[registry_key] = cls
219
+
220
+ # Register in PERSISTABLE_NODE_REGISTRY (DB persistence)
221
+ if config.is_persisted:
222
+ _register_persistable(config.table_name, cls)
223
+
224
+ # Store resolved content type from annotation if not explicit in config
225
+ if config.is_sentinel_field("content_type") and "content" in cls.model_fields:
226
+ content_field = cls.model_fields["content"]
227
+ if content_field.annotation is not None:
228
+ # Store for DDL generation (don't modify frozen config)
229
+ cls._resolved_content_type = content_field.annotation
230
+ else:
231
+ cls._resolved_content_type = None
232
+ else:
233
+ cls._resolved_content_type = (
234
+ None if config.is_sentinel_field("content_type") else config.content_type
235
+ )
236
+
237
+ @field_serializer("content")
238
+ def _serialize_content(self, value: Any) -> Any:
239
+ """Serialize content to JSON-compatible dict. Preserves sentinels."""
240
+ if value is None:
241
+ return None
242
+ if is_sentinel(value):
243
+ return Unset
244
+ return json_dump(value, decode=True, as_loaded=True)
245
+
246
+ @field_validator("content", mode="before")
247
+ @classmethod
248
+ def _validate_content(cls, value: Any) -> Any:
249
+ """Validate content type and handle polymorphic deserialization."""
250
+ if is_sentinel(value):
251
+ return value
252
+
253
+ if value is not None and not isinstance(value, (Serializable, BaseModel, dict)):
254
+ raise TypeError(
255
+ f"content must be Serializable, BaseModel, dict, or None. "
256
+ f"Got {type(value).__name__}. "
257
+ f"Use dict for unstructured data: content={{'value': {value!r}}} "
258
+ f"or Element.metadata for simple key-value pairs."
259
+ )
260
+
261
+ # Polymorphic: restore type from kronos_class in metadata
262
+ if isinstance(value, dict) and "metadata" in value:
263
+ metadata = value.get("metadata", {})
264
+ kron_class = metadata.get("kron_class")
265
+ if kron_class:
266
+ if kron_class in NODE_REGISTRY or kron_class.split(".")[-1] in NODE_REGISTRY:
267
+ return Node.from_dict(value)
268
+ return Element.from_dict(value)
269
+ return value
270
+
271
+ def to_dict(
272
+ self,
273
+ mode: Literal["python", "json", "db"] = "python",
274
+ created_at_format: (Literal["datetime", "isoformat", "timestamp"] | UnsetType) = Unset,
275
+ meta_key: str | UnsetType = Unset,
276
+ content_serializer: Callable[[Any], Any] | None = None,
277
+ **kwargs: Any,
278
+ ) -> dict[str, Any]:
279
+ """Serialize to dict with optional custom content handling.
280
+
281
+ Args:
282
+ mode: "python" (native types), "json" (JSON-safe), "db" (DB-ready)
283
+ created_at_format: Override time format for created_at
284
+ meta_key: Rename metadata field (e.g., "node_metadata" for DB)
285
+ content_serializer: Custom serializer for content field
286
+ **kwargs: Passed to model_dump()
287
+
288
+ Returns:
289
+ Serialized dict. If content_serializer provided, content is
290
+ excluded from model_dump and replaced with serializer output.
291
+
292
+ Notes:
293
+ When mode="db" and config.flatten_content=True, content fields
294
+ are spread into the result dict (no "content" key). This matches
295
+ the flattened DDL schema.
296
+
297
+ """
298
+ config = self.get_config()
299
+
300
+ # Resolve content type for flattening decision
301
+ content_type = (
302
+ config.content_type
303
+ if not config.is_sentinel_field("content_type")
304
+ else self._resolved_content_type
305
+ )
306
+
307
+ # DB mode with flatten_content: spread content fields into result
308
+ # Only flatten when we have a typed BaseModel content that can be reconstructed
309
+ can_flatten = (
310
+ config.flatten_content
311
+ and self.content is not None
312
+ and content_type is not None
313
+ and isinstance(content_type, type)
314
+ and issubclass(content_type, BaseModel)
315
+ )
316
+
317
+ if mode == "db" and can_flatten:
318
+ # Exclude content from base serialization
319
+ exclude = kwargs.get("exclude", set())
320
+ if isinstance(exclude, set):
321
+ exclude = exclude | {"content"}
322
+ elif isinstance(exclude, dict):
323
+ exclude = exclude.copy()
324
+ exclude["content"] = True
325
+ else:
326
+ exclude = {"content"}
327
+ kwargs["exclude"] = exclude
328
+
329
+ # Use config.meta_key for DB mode if not overridden
330
+ effective_meta_key = meta_key if not is_unset(meta_key) else config.meta_key
331
+
332
+ # Get base dict without content
333
+ result = super().to_dict(
334
+ mode=mode,
335
+ created_at_format=created_at_format,
336
+ meta_key=effective_meta_key,
337
+ **kwargs,
338
+ )
339
+
340
+ # Flatten content fields into result (content is BaseModel per can_flatten check)
341
+ content_dict = self.content.model_dump(mode="json") # type: ignore[union-attr]
342
+ result.update(content_dict)
343
+ return result
344
+
345
+ # Custom content serializer
346
+ if content_serializer is not None:
347
+ if not callable(content_serializer):
348
+ typ = type(content_serializer).__name__
349
+ raise TypeError(f"content_serializer must be callable, got {typ}")
350
+
351
+ # Exclude content from model_dump
352
+ exclude = kwargs.get("exclude", set())
353
+ if isinstance(exclude, set):
354
+ exclude = exclude | {"content"}
355
+ elif isinstance(exclude, dict):
356
+ exclude = exclude.copy()
357
+ exclude["content"] = True
358
+ else:
359
+ exclude = {"content"}
360
+ kwargs["exclude"] = exclude
361
+
362
+ # Get dict without content
363
+ result = super().to_dict(
364
+ mode=mode,
365
+ created_at_format=created_at_format,
366
+ meta_key=meta_key,
367
+ **kwargs,
368
+ )
369
+
370
+ # Add serialized content
371
+ result["content"] = content_serializer(self.content)
372
+ return result
373
+
374
+ # Delegate to Element.to_dict
375
+ return super().to_dict(
376
+ mode=mode,
377
+ created_at_format=created_at_format,
378
+ meta_key=meta_key,
379
+ **kwargs,
380
+ )
381
+
382
+ @classmethod
383
+ def from_dict(
384
+ cls,
385
+ data: dict[str, Any],
386
+ meta_key: str | UnsetType = Unset,
387
+ content_deserializer: Callable[[Any], Any] | None = None,
388
+ from_row: bool = False,
389
+ **kwargs: Any,
390
+ ) -> Node:
391
+ """Deserialize dict to Node with polymorphic type restoration.
392
+
393
+ Looks up kron_class in metadata to restore original Node subclass.
394
+ Handles legacy "node_metadata" key and custom meta_key mapping.
395
+
396
+ Args:
397
+ data: Dict from to_dict() or DB row
398
+ meta_key: Custom metadata field name to restore
399
+ content_deserializer: Transform content before validation
400
+ from_row: If True and config.flatten_content, extract content fields
401
+ from flattened row data (inverse of to_dict(mode="db"))
402
+ **kwargs: Passed to model_validate()
403
+
404
+ Returns:
405
+ Node instance (or appropriate subclass via NODE_REGISTRY lookup)
406
+
407
+ """
408
+ data = data.copy()
409
+ config = cls.get_config()
410
+
411
+ # Handle flattened DB row: extract content fields and reconstruct content
412
+ if from_row and config.flatten_content and "content" not in data:
413
+ content_type = (
414
+ config.content_type
415
+ if not config.is_sentinel_field("content_type")
416
+ else cls._resolved_content_type
417
+ )
418
+ if (
419
+ content_type is not None
420
+ and isinstance(content_type, type)
421
+ and issubclass(content_type, BaseModel)
422
+ ):
423
+ content_field_names = set(content_type.model_fields.keys())
424
+ content_data = {k: v for k, v in data.items() if k in content_field_names}
425
+ for k in content_field_names:
426
+ data.pop(k, None)
427
+ data["content"] = content_type(**content_data)
428
+
429
+ # Handle meta_key for DB rows
430
+ effective_meta_key = (
431
+ meta_key if not is_unset(meta_key) else (config.meta_key if from_row else Unset)
432
+ )
433
+
434
+ if content_deserializer is not None:
435
+ if not callable(content_deserializer):
436
+ typ = type(content_deserializer).__name__
437
+ raise TypeError(f"content_deserializer must be callable, got {typ}")
438
+ if "content" in data:
439
+ try:
440
+ data["content"] = content_deserializer(data["content"])
441
+ except Exception as e:
442
+ raise ValueError(f"content_deserializer failed: {e}") from e
443
+
444
+ # Restore metadata from custom key (meta_key or legacy "node_metadata")
445
+ if not is_unset(effective_meta_key) and effective_meta_key in data:
446
+ data["metadata"] = data.pop(effective_meta_key)
447
+ elif "node_metadata" in data and "metadata" not in data:
448
+ data["metadata"] = data.pop("node_metadata")
449
+ data.pop("node_metadata", None)
450
+
451
+ # Extract kron_class for polymorphic dispatch (remove from metadata)
452
+ metadata = data.get("metadata", {})
453
+ if isinstance(metadata, dict):
454
+ metadata = metadata.copy()
455
+ data["metadata"] = metadata
456
+ kron_class = metadata.pop("kron_class", None)
457
+ else:
458
+ kron_class = None
459
+
460
+ if kron_class and kron_class != cls.class_name(full=True):
461
+ target_cls = NODE_REGISTRY.get(kron_class) or NODE_REGISTRY.get(
462
+ kron_class.split(".")[-1]
463
+ )
464
+ if target_cls is not None and target_cls is not cls:
465
+ return target_cls.from_dict(
466
+ data,
467
+ content_deserializer=content_deserializer,
468
+ from_row=from_row,
469
+ **kwargs,
470
+ )
471
+
472
+ return cls.model_validate(data, **kwargs)
473
+
474
+ # --- Audit & Lifecycle ---
475
+
476
+ def _has_field(self, name: str) -> bool:
477
+ """Check if name is a declared model field (not property/method)."""
478
+ return name in self.__class__.model_fields
479
+
480
+ def rehash(self) -> str | None:
481
+ """Recompute and store content_hash. Returns hash or None if disabled."""
482
+ config = self.get_config()
483
+ if not config.content_hashing:
484
+ return None
485
+
486
+ new_hash = compute_hash(self.content, none_as_valid=True)
487
+
488
+ # Store in field if it exists, otherwise in metadata
489
+ if self._has_field("content_hash"):
490
+ self.content_hash = new_hash
491
+ else:
492
+ self.metadata["content_hash"] = new_hash
493
+
494
+ return new_hash
495
+
496
+ def update_integrity_hash(self, previous_hash: str | None = None) -> str | None:
497
+ """Compute chain hash for tamper-evident audit trail.
498
+
499
+ Args:
500
+ previous_hash: Previous entry's hash (None for genesis/first entry)
501
+
502
+ Returns:
503
+ Computed integrity_hash, or None if integrity_hashing disabled
504
+
505
+ """
506
+ from kronos.utils import compute_chain_hash
507
+
508
+ config = self.get_config()
509
+ if not config.integrity_hashing:
510
+ return None
511
+
512
+ # Use existing content_hash or compute on-the-fly
513
+ content_hash = None
514
+ if self._has_field("content_hash"):
515
+ content_hash = self.content_hash
516
+ elif "content_hash" in self.metadata:
517
+ content_hash = self.metadata.get("content_hash")
518
+ if content_hash is None:
519
+ content_hash = compute_hash(self.content, none_as_valid=True)
520
+
521
+ new_integrity_hash = compute_chain_hash(content_hash, previous_hash)
522
+
523
+ if self._has_field("integrity_hash"):
524
+ self.integrity_hash = new_integrity_hash
525
+ else:
526
+ self.metadata["integrity_hash"] = new_integrity_hash
527
+
528
+ return new_integrity_hash
529
+
530
+ def touch(self, by: UUID | str | None = None) -> None:
531
+ """Update timestamps, increment version, and rehash (per config).
532
+
533
+ Args:
534
+ by: Actor identifier for updated_by field
535
+
536
+ """
537
+ config = self.get_config()
538
+
539
+ if config.track_updated_at and self._has_field("updated_at"):
540
+ self.updated_at = now_utc()
541
+ if by is not None and self._has_field("updated_by"):
542
+ self.updated_by = str(by)
543
+ if config.versioning and self._has_field("version"):
544
+ self.version += 1
545
+ if config.content_hashing:
546
+ self.rehash()
547
+
548
+ def soft_delete(self, by: UUID | str | None = None) -> None:
549
+ """Mark as deleted (reversible). Requires soft_delete=True in config.
550
+
551
+ Args:
552
+ by: Actor identifier for deleted_by field
553
+
554
+ Raises:
555
+ RuntimeError: If soft_delete not enabled
556
+
557
+ """
558
+ config = self.get_config()
559
+ if not config.soft_delete:
560
+ raise RuntimeError(
561
+ f"{self.__class__.__name__} does not support soft_delete. "
562
+ f"Enable with create_node(..., soft_delete=True)"
563
+ )
564
+
565
+ if self._has_field("deleted_at"):
566
+ self.deleted_at = now_utc()
567
+ if self._has_field("is_deleted"):
568
+ self.is_deleted = True
569
+ if by is not None and self._has_field("deleted_by"):
570
+ self.deleted_by = str(by)
571
+
572
+ self.touch(by)
573
+
574
+ def restore(self, by: UUID | str | None = None) -> None:
575
+ """Undelete a soft-deleted node. Requires soft_delete=True in config.
576
+
577
+ Args:
578
+ by: Actor identifier for updated_by (deleted_by is cleared)
579
+
580
+ Raises:
581
+ RuntimeError: If soft_delete not enabled
582
+
583
+ """
584
+ config = self.get_config()
585
+ if not config.soft_delete:
586
+ raise RuntimeError(
587
+ f"{self.__class__.__name__} does not support restore. "
588
+ f"Enable with create_node(..., soft_delete=True)"
589
+ )
590
+
591
+ if self._has_field("deleted_at"):
592
+ self.deleted_at = None
593
+ if self._has_field("is_deleted"):
594
+ self.is_deleted = False
595
+ if self._has_field("deleted_by"):
596
+ self.deleted_by = None # Clear who deleted on restore
597
+
598
+ self.touch(by)
599
+
600
+ def activate(self, by: UUID | str | None = None) -> None:
601
+ """Mark as active. Requires track_is_active=True in config.
602
+
603
+ Args:
604
+ by: Actor identifier for updated_by field
605
+
606
+ Raises:
607
+ RuntimeError: If track_is_active not enabled
608
+
609
+ """
610
+ config = self.get_config()
611
+ if not config.track_is_active:
612
+ raise RuntimeError(
613
+ f"{self.__class__.__name__} does not support activate. "
614
+ f"Enable with create_node(..., track_is_active=True)"
615
+ )
616
+ if self._has_field("is_active"):
617
+ self.is_active = True
618
+ self.touch(by)
619
+
620
+ def deactivate(self, by: UUID | str | None = None) -> None:
621
+ """Mark as inactive. Requires track_is_active=True in config.
622
+
623
+ Args:
624
+ by: Actor identifier for updated_by field
625
+
626
+ Raises:
627
+ RuntimeError: If track_is_active not enabled
628
+
629
+ """
630
+ config = self.get_config()
631
+ if not config.track_is_active:
632
+ raise RuntimeError(
633
+ f"{self.__class__.__name__} does not support deactivate. "
634
+ f"Enable with create_node(..., track_is_active=True)"
635
+ )
636
+ if self._has_field("is_active"):
637
+ self.is_active = False
638
+ self.touch(by)
639
+
640
+
641
+ NODE_REGISTRY[Node.__name__] = Node
642
+ NODE_REGISTRY[Node.class_name(full=True)] = Node
643
+
644
+
645
+ # --- Node Factory ---
646
+
647
+
648
+ def create_node(
649
+ name: str,
650
+ *,
651
+ content: type[BaseModel] | None = None,
652
+ embedding: Any | None = None, # Vector[dim] annotation
653
+ embedding_enabled: bool = False, # Alternative: enable with dim
654
+ embedding_dim: int | None = None, # Alternative: specify dimension
655
+ table_name: str | None = None,
656
+ schema: str = "public",
657
+ flatten_content: bool = True,
658
+ immutable: bool = False,
659
+ # Audit & lifecycle options
660
+ content_hashing: bool = False,
661
+ integrity_hashing: bool = False,
662
+ soft_delete: bool = False,
663
+ track_deleted_by: bool = False,
664
+ track_is_active: bool = False,
665
+ versioning: bool = False,
666
+ track_updated_at: bool = True,
667
+ track_updated_by: bool = True,
668
+ doc: str | None = None,
669
+ **config_kwargs: Any,
670
+ ) -> type[Node]:
671
+ """Create Node subclass with typed content, embedding, and audit fields.
672
+
673
+ Factory ensures NodeConfig validation at class creation. Fields are
674
+ generated from Spec catalog, not just configured.
675
+
676
+ Args:
677
+ name: Class name (e.g., "Job", "Evidence")
678
+ content: BaseModel for typed content (FK[Model] preserved for DDL)
679
+ embedding: Vector[dim] annotation (adds embedding: list[float] | None)
680
+ embedding_enabled: Alternative to embedding - enable with explicit dim
681
+ embedding_dim: Dimension when using embedding_enabled=True
682
+ table_name: DB table name (registers in PERSISTABLE_NODE_REGISTRY)
683
+ schema: DB schema (default: "public")
684
+ flatten_content: Flatten content fields in DDL (default: True)
685
+ immutable: Freeze content (append-only pattern)
686
+ content_hashing: SHA-256 hash on content changes
687
+ integrity_hashing: Chain hash for audit trail
688
+ soft_delete: Enable soft_delete()/restore() methods
689
+ track_deleted_by: Track deleted_by (requires soft_delete)
690
+ track_is_active: Add is_active field with activate()/deactivate()
691
+ versioning: Track version number
692
+ track_updated_at: Add updated_at timestamp (default: True)
693
+ track_updated_by: Track updated_by actor (default: True)
694
+ **config_kwargs: Additional NodeConfig parameters
695
+
696
+ Returns:
697
+ Node subclass with configured fields and lifecycle methods.
698
+
699
+ Example:
700
+ >>> # Option 1: Vector annotation
701
+ >>> Job = create_node("Job", embedding=Vector[1536])
702
+ >>>
703
+ >>> # Option 2: Explicit enable + dim (preferred for tests)
704
+ >>> Job = create_node("Job", embedding_enabled=True, embedding_dim=1536)
705
+
706
+ """
707
+ from kronos.specs.catalog import AuditSpecs, ContentSpecs
708
+ from kronos.specs.operable import Operable
709
+
710
+ # Resolve embedding dimension
711
+ resolved_embedding_dim: int | UnsetType = Unset
712
+ has_embedding = False
713
+
714
+ if embedding is not None:
715
+ vec_meta = extract_kron_db_meta(embedding, metas="Vector")
716
+ if isinstance(vec_meta, VectorMeta):
717
+ resolved_embedding_dim = vec_meta.dim
718
+ has_embedding = True
719
+ else:
720
+ raise ValueError(
721
+ f"embedding must be Vector[dim] annotation, got {embedding}. "
722
+ f"Use: embedding=Vector[1536]"
723
+ )
724
+ elif embedding_enabled:
725
+ if embedding_dim is None or embedding_dim <= 0:
726
+ raise ValueError("embedding_dim must be positive when embedding_enabled=True")
727
+ resolved_embedding_dim = embedding_dim
728
+ has_embedding = True
729
+
730
+ # 1. Build all possible specs
731
+ all_specs = ContentSpecs.get_specs(
732
+ content_type=content if content else Unset,
733
+ dim=resolved_embedding_dim,
734
+ ) + AuditSpecs.get_specs(use_uuid=True)
735
+
736
+ # 2. Track which fields to include
737
+ include: list[str] = ["id", "created_at"]
738
+
739
+ if content is not None:
740
+ include.append("content")
741
+ if has_embedding:
742
+ include.append("embedding")
743
+
744
+ needs_update_tracking = (
745
+ track_updated_at or content_hashing or integrity_hashing or soft_delete or versioning
746
+ )
747
+ if needs_update_tracking:
748
+ include.append("updated_at")
749
+ if track_updated_by:
750
+ include.append("updated_by")
751
+ if content_hashing:
752
+ include.append("content_hash")
753
+ if integrity_hashing:
754
+ include.append("integrity_hash")
755
+ if soft_delete:
756
+ include.extend(["is_deleted", "deleted_at"])
757
+ if track_deleted_by:
758
+ include.append("deleted_by")
759
+ if versioning:
760
+ include.append("version")
761
+ if track_is_active:
762
+ include.append("is_active")
763
+
764
+ # 3. Build config
765
+ node_config = NodeConfig(
766
+ table_name=table_name if table_name else Unset,
767
+ schema=schema,
768
+ embedding_enabled=has_embedding,
769
+ embedding_dim=resolved_embedding_dim,
770
+ content_type=content if content else Unset,
771
+ content_frozen=immutable,
772
+ flatten_content=flatten_content,
773
+ content_hashing=content_hashing,
774
+ integrity_hashing=integrity_hashing,
775
+ soft_delete=soft_delete,
776
+ track_deleted_by=track_deleted_by,
777
+ track_is_active=track_is_active,
778
+ versioning=versioning,
779
+ track_updated_at=track_updated_at,
780
+ track_updated_by=track_updated_by,
781
+ **config_kwargs,
782
+ )
783
+
784
+ # 4. Compose Node subclass
785
+ op = Operable(all_specs, adapter="pydantic")
786
+ node_cls: type[Node] = op.compose_structure(
787
+ name,
788
+ include=set(include),
789
+ base_type=Node,
790
+ doc=doc,
791
+ )
792
+ node_cls.node_config = node_config # type: ignore[attr-defined]
793
+
794
+ return node_cls
795
+
796
+
797
+ # --- DDL Generation ---
798
+
799
+
800
+ def _extract_base_type(annotation: Any) -> Any:
801
+ """Extract non-None type from Union (e.g., T | None -> T)."""
802
+ import types
803
+ from typing import get_args, get_origin
804
+
805
+ if annotation is None:
806
+ return None
807
+
808
+ if isinstance(annotation, types.UnionType) or get_origin(annotation) is type(int | str):
809
+ args = get_args(annotation)
810
+ non_none_args = [a for a in args if a is not type(None)]
811
+ if non_none_args:
812
+ return non_none_args[0]
813
+
814
+ return annotation
815
+
816
+
817
+ def generate_ddl(
818
+ node_cls: type[Node],
819
+ *,
820
+ include_audit_columns: bool = True,
821
+ ) -> str:
822
+ """Generate CREATE TABLE DDL from Node subclass.
823
+
824
+ Flattens content fields (if configured), adds audit columns, and
825
+ generates PostgreSQL DDL with pgvector support for embeddings.
826
+
827
+ Args:
828
+ node_cls: Persistable Node subclass (must have table_name)
829
+ include_audit_columns: Include audit columns from NodeConfig
830
+
831
+ Returns:
832
+ CREATE TABLE IF NOT EXISTS statement
833
+
834
+ Raises:
835
+ ValueError: If node_cls has no table_name configured
836
+
837
+ """
838
+ from kronos.specs.catalog import AuditSpecs, ContentSpecs
839
+ from kronos.specs.operable import Operable
840
+
841
+ config = node_cls.get_config()
842
+ if not config.is_persisted:
843
+ raise ValueError(f"{node_cls.__name__} is not persistable (no table_name configured)")
844
+
845
+ # 1. Build all possible specs for this node
846
+ content_type = (
847
+ config.content_type
848
+ if not config.is_sentinel_field("content_type")
849
+ else _extract_base_type(node_cls._resolved_content_type)
850
+ )
851
+
852
+ all_specs = ContentSpecs.get_specs(
853
+ dim=config.embedding_dim if config.embedding_enabled else Unset
854
+ ) + AuditSpecs.get_specs(use_uuid=True)
855
+
856
+ # Flatten content: extract fields from BaseModel instead of generic JSONB
857
+ if config.flatten_content and content_type is not None:
858
+ from kronos.specs.adapters.pydantic_adapter import PydanticSpecAdapter
859
+
860
+ if isinstance(content_type, type) and issubclass(content_type, BaseModel):
861
+ all_specs.extend(PydanticSpecAdapter.extract_specs(content_type))
862
+
863
+ # 2. Track which field names to include
864
+ include: set[str] = {"id", "created_at"}
865
+
866
+ if config.embedding_enabled:
867
+ include.add("embedding")
868
+
869
+ # Content column (unless flattened into individual fields)
870
+ if not (
871
+ config.flatten_content
872
+ and content_type is not None
873
+ and isinstance(content_type, type)
874
+ and issubclass(content_type, BaseModel)
875
+ ):
876
+ include.add("content")
877
+
878
+ include.add("metadata")
879
+
880
+ if include_audit_columns:
881
+ if config.track_updated_at:
882
+ include.add("updated_at")
883
+ if config.track_updated_by:
884
+ include.add("updated_by")
885
+ if config.track_is_active:
886
+ include.add("is_active")
887
+ if config.soft_delete:
888
+ include.update({"is_deleted", "deleted_at"})
889
+ if config.track_deleted_by:
890
+ include.add("deleted_by")
891
+ if config.versioning:
892
+ include.add("version")
893
+ if config.content_hashing:
894
+ include.add("content_hash")
895
+ if config.integrity_hashing:
896
+ include.add("integrity_hash")
897
+
898
+ # If flattened, include the extracted content field names
899
+ if config.flatten_content and content_type is not None:
900
+ if isinstance(content_type, type) and issubclass(content_type, BaseModel):
901
+ include.update(content_type.model_fields.keys())
902
+
903
+ # 3. Compose DDL via Operable
904
+ op = Operable(all_specs, adapter="sql")
905
+ return op.compose_structure(
906
+ config.table_name,
907
+ include=include,
908
+ schema=config.schema,
909
+ primary_key="id",
910
+ )
911
+
912
+
913
+ def generate_all_ddl(*, schema: str | None = None) -> str:
914
+ """Generate DDL for all registered persistable Node subclasses.
915
+
916
+ Iterates PERSISTABLE_NODE_REGISTRY and generates CREATE TABLE for each.
917
+
918
+ Args:
919
+ schema: Filter to specific schema (None = all schemas)
920
+
921
+ Returns:
922
+ Combined DDL statements separated by blank lines
923
+
924
+ """
925
+ statements: list[str] = []
926
+
927
+ for node_cls in PERSISTABLE_NODE_REGISTRY.values():
928
+ config = node_cls.get_config()
929
+
930
+ if schema is not None and config.schema != schema:
931
+ continue
932
+
933
+ ddl = generate_ddl(node_cls)
934
+ statements.append(ddl)
935
+
936
+ return "\n\n".join(statements)
937
+
938
+
939
+ def get_fk_dependencies(node_cls: type[Node]) -> set[str]:
940
+ """Get table names that this node depends on via foreign keys.
941
+
942
+ Used for topological sorting in migrations - ensures tables are
943
+ created in dependency order.
944
+
945
+ Args:
946
+ node_cls: Node subclass to analyze
947
+
948
+ Returns:
949
+ Set of table names this node references via FK[Model]
950
+ """
951
+
952
+ config = node_cls.get_config()
953
+ content_type = (
954
+ config.content_type
955
+ if not config.is_sentinel_field("content_type")
956
+ else node_cls._resolved_content_type
957
+ )
958
+
959
+ if content_type is None or not hasattr(content_type, "model_fields"):
960
+ return set()
961
+
962
+ deps: set[str] = set()
963
+ for field_info in content_type.model_fields.values():
964
+ fk = extract_kron_db_meta(field_info, metas="FK")
965
+ if not_sentinel(fk):
966
+ deps.add(fk.table_name)
967
+ return deps
968
+
969
+
970
+ __all__ = (
971
+ # Registries
972
+ "NODE_REGISTRY",
973
+ "PERSISTABLE_NODE_REGISTRY",
974
+ # Classes
975
+ "Node",
976
+ "NodeConfig",
977
+ # Factory & DDL
978
+ "create_node",
979
+ "generate_ddl",
980
+ "generate_all_ddl",
981
+ "get_fk_dependencies",
982
+ )