katalyst-engine 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- katalyst_engine/__init__.py +6 -0
- katalyst_engine/bundle/__init__.py +30 -0
- katalyst_engine/bundle/discovery.py +158 -0
- katalyst_engine/bundle/loader.py +134 -0
- katalyst_engine/bundle/protocol.py +209 -0
- katalyst_engine/core/__init__.py +62 -0
- katalyst_engine/core/compatibility.py +58 -0
- katalyst_engine/core/compositional.py +103 -0
- katalyst_engine/core/definitive.py +195 -0
- katalyst_engine/core/evolvable.py +89 -0
- katalyst_engine/core/identity.py +95 -0
- katalyst_engine/core/lifecycle.py +62 -0
- katalyst_engine/core/relation.py +151 -0
- katalyst_engine/core/version.py +203 -0
- katalyst_engine/discovery/__init__.py +20 -0
- katalyst_engine/discovery/declaration.py +74 -0
- katalyst_engine/discovery/dispatcher.py +83 -0
- katalyst_engine/discovery/protocol.py +69 -0
- katalyst_engine/events/__init__.py +10 -0
- katalyst_engine/events/bus.py +102 -0
- katalyst_engine/events/event.py +82 -0
- katalyst_engine/extensions/__init__.py +32 -0
- katalyst_engine/extensions/capability.py +45 -0
- katalyst_engine/extensions/discovery.py +85 -0
- katalyst_engine/extensions/effector.py +54 -0
- katalyst_engine/extensions/provider.py +33 -0
- katalyst_engine/extensions/registry.py +77 -0
- katalyst_engine/extensions/trigger.py +64 -0
- katalyst_engine/model/__init__.py +25 -0
- katalyst_engine/model/manager.py +85 -0
- katalyst_engine/model/materializer.py +78 -0
- katalyst_engine/model/node.py +49 -0
- katalyst_engine/model/query.py +186 -0
- katalyst_engine/model/store.py +119 -0
- katalyst_engine/py.typed +0 -0
- katalyst_engine/replication/__init__.py +30 -0
- katalyst_engine/replication/engine.py +104 -0
- katalyst_engine/replication/job.py +88 -0
- katalyst_engine/replication/transform.py +111 -0
- katalyst_engine/resolution/__init__.py +32 -0
- katalyst_engine/resolution/conflict.py +91 -0
- katalyst_engine/resolution/engine.py +131 -0
- katalyst_engine/resolution/strategies.py +122 -0
- katalyst_engine/schema/__init__.py +35 -0
- katalyst_engine/schema/definition.py +281 -0
- katalyst_engine/schema/manager.py +95 -0
- katalyst_engine/schema/registry.py +367 -0
- katalyst_engine/schema/versioning.py +115 -0
- katalyst_engine/snapshot/__init__.py +18 -0
- katalyst_engine/snapshot/diff.py +94 -0
- katalyst_engine/snapshot/snapshot.py +111 -0
- katalyst_engine/source/__init__.py +26 -0
- katalyst_engine/source/manifest.py +45 -0
- katalyst_engine/source/registry.py +122 -0
- katalyst_engine/source/source.py +92 -0
- katalyst_engine/toolkit/__init__.py +22 -0
- katalyst_engine/toolkit/file_ops.py +194 -0
- katalyst_engine/toolkit/rendering.py +58 -0
- katalyst_engine-2.0.0.dist-info/METADATA +50 -0
- katalyst_engine-2.0.0.dist-info/RECORD +61 -0
- katalyst_engine-2.0.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""Query engine — filter, sort, and paginate over a ModelStore.
|
|
2
|
+
|
|
3
|
+
QueryFilter specifies criteria for selecting nodes. QueryEngine
|
|
4
|
+
applies filters and provides sorted, paginated results.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from collections.abc import Callable, Iterable
|
|
10
|
+
from enum import Enum
|
|
11
|
+
from fnmatch import fnmatch
|
|
12
|
+
|
|
13
|
+
from pydantic import BaseModel
|
|
14
|
+
|
|
15
|
+
from katalyst_engine.model.node import Node
|
|
16
|
+
from katalyst_engine.model.store import ModelStore
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SortField(str, Enum):
|
|
20
|
+
"""Fields available for sorting query results."""
|
|
21
|
+
|
|
22
|
+
FQN = "fqn"
|
|
23
|
+
NAME = "name"
|
|
24
|
+
KIND = "kind"
|
|
25
|
+
NAMESPACE = "namespace"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class SortOrder(str, Enum):
|
|
29
|
+
"""Sort direction."""
|
|
30
|
+
|
|
31
|
+
ASC = "asc"
|
|
32
|
+
DESC = "desc"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class QueryFilter(BaseModel, frozen=True):
|
|
36
|
+
"""Criteria for selecting nodes from a ModelStore.
|
|
37
|
+
|
|
38
|
+
All non-None criteria are ANDed. Glob patterns use fnmatch syntax.
|
|
39
|
+
Label matchers require exact matches on all specified key-value pairs.
|
|
40
|
+
The ``custom`` callable allows domain-specific predicates without
|
|
41
|
+
requiring the engine to know about domain types (e.g., SchemaRegistry).
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
kind: str | None = None
|
|
45
|
+
"""Exact kind match. None means any kind."""
|
|
46
|
+
|
|
47
|
+
namespace: str | None = None
|
|
48
|
+
"""Exact namespace match. None means any namespace."""
|
|
49
|
+
|
|
50
|
+
name_pattern: str | None = None
|
|
51
|
+
"""Glob pattern for name matching. None means any name."""
|
|
52
|
+
|
|
53
|
+
label_matchers: dict[str, str] = {}
|
|
54
|
+
"""Label key-value pairs that must all match exactly."""
|
|
55
|
+
|
|
56
|
+
custom: Callable[[Node], bool] | None = None
|
|
57
|
+
"""Arbitrary predicate for domain-specific filtering.
|
|
58
|
+
|
|
59
|
+
When provided, a node must satisfy this callable in addition to all
|
|
60
|
+
other criteria. This allows taxonomy (or any consumer) to inject
|
|
61
|
+
schema-aware predicates without the engine importing domain types.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
model_config = {"arbitrary_types_allowed": True}
|
|
65
|
+
|
|
66
|
+
def matches(self, node: Node) -> bool:
|
|
67
|
+
"""Check if a node matches all filter criteria."""
|
|
68
|
+
if self.kind is not None and node.identity.kind != self.kind:
|
|
69
|
+
return False
|
|
70
|
+
if self.namespace is not None and node.identity.namespace != self.namespace:
|
|
71
|
+
return False
|
|
72
|
+
if self.name_pattern is not None and not fnmatch(node.identity.name, self.name_pattern):
|
|
73
|
+
return False
|
|
74
|
+
for key, value in self.label_matchers.items():
|
|
75
|
+
if node.labels.get(key) != value:
|
|
76
|
+
return False
|
|
77
|
+
if self.custom is not None and not self.custom(node):
|
|
78
|
+
return False
|
|
79
|
+
return True
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class QueryResult(BaseModel, frozen=True):
|
|
83
|
+
"""Paginated query result."""
|
|
84
|
+
|
|
85
|
+
nodes: tuple[Node, ...] = ()
|
|
86
|
+
"""Nodes matching the query."""
|
|
87
|
+
|
|
88
|
+
total: int = 0
|
|
89
|
+
"""Total number of matching nodes (before pagination)."""
|
|
90
|
+
|
|
91
|
+
offset: int = 0
|
|
92
|
+
"""Offset of the first returned node."""
|
|
93
|
+
|
|
94
|
+
limit: int | None = None
|
|
95
|
+
"""Maximum number of nodes returned. None means no limit."""
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class QueryEngine:
|
|
99
|
+
"""Filter, sort, and paginate over nodes.
|
|
100
|
+
|
|
101
|
+
Accepts either a ``ModelStore`` or any iterable of ``Node`` objects,
|
|
102
|
+
enabling consumers to provide pre-filtered collections or domain-specific
|
|
103
|
+
stores without wrapping them in a ``ModelStore``.
|
|
104
|
+
|
|
105
|
+
Usage:
|
|
106
|
+
engine = QueryEngine(store)
|
|
107
|
+
result = engine.query(
|
|
108
|
+
QueryFilter(kind="layer"),
|
|
109
|
+
sort_by=SortField.NAME,
|
|
110
|
+
offset=0, limit=10,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Or with raw nodes:
|
|
114
|
+
engine = QueryEngine(my_node_list)
|
|
115
|
+
result = engine.query(QueryFilter(custom=my_predicate))
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
def __init__(self, source: ModelStore | Iterable[Node]) -> None:
|
|
119
|
+
self._source = source
|
|
120
|
+
|
|
121
|
+
def _all_nodes(self) -> list[Node]:
|
|
122
|
+
"""Get all nodes from the source."""
|
|
123
|
+
if isinstance(self._source, ModelStore):
|
|
124
|
+
return list(self._source.all_nodes())
|
|
125
|
+
return list(self._source)
|
|
126
|
+
|
|
127
|
+
def query(
|
|
128
|
+
self,
|
|
129
|
+
filter: QueryFilter | None = None,
|
|
130
|
+
sort_by: SortField = SortField.FQN,
|
|
131
|
+
sort_order: SortOrder = SortOrder.ASC,
|
|
132
|
+
offset: int = 0,
|
|
133
|
+
limit: int | None = None,
|
|
134
|
+
) -> QueryResult:
|
|
135
|
+
"""Execute a query against the store.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
filter: Selection criteria. None returns all nodes.
|
|
139
|
+
sort_by: Field to sort by.
|
|
140
|
+
sort_order: Ascending or descending.
|
|
141
|
+
offset: Number of results to skip.
|
|
142
|
+
limit: Maximum number of results to return.
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
A QueryResult with matching, sorted, paginated nodes.
|
|
146
|
+
"""
|
|
147
|
+
candidates = self._all_nodes()
|
|
148
|
+
|
|
149
|
+
if filter is not None:
|
|
150
|
+
candidates = [n for n in candidates if filter.matches(n)]
|
|
151
|
+
|
|
152
|
+
candidates = self._sort(candidates, sort_by, sort_order)
|
|
153
|
+
total = len(candidates)
|
|
154
|
+
|
|
155
|
+
if offset > 0:
|
|
156
|
+
candidates = candidates[offset:]
|
|
157
|
+
if limit is not None:
|
|
158
|
+
candidates = candidates[:limit]
|
|
159
|
+
|
|
160
|
+
return QueryResult(
|
|
161
|
+
nodes=tuple(candidates),
|
|
162
|
+
total=total,
|
|
163
|
+
offset=offset,
|
|
164
|
+
limit=limit,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
def _sort(
|
|
168
|
+
self,
|
|
169
|
+
nodes: list[Node],
|
|
170
|
+
sort_by: SortField,
|
|
171
|
+
sort_order: SortOrder,
|
|
172
|
+
) -> list[Node]:
|
|
173
|
+
"""Sort nodes by the specified field and order."""
|
|
174
|
+
|
|
175
|
+
def key_fn(node: Node) -> str:
|
|
176
|
+
if sort_by == SortField.FQN:
|
|
177
|
+
return node.identity.fqn
|
|
178
|
+
elif sort_by == SortField.NAME:
|
|
179
|
+
return node.identity.name
|
|
180
|
+
elif sort_by == SortField.KIND:
|
|
181
|
+
return node.identity.kind
|
|
182
|
+
elif sort_by == SortField.NAMESPACE:
|
|
183
|
+
return node.identity.namespace
|
|
184
|
+
return node.identity.fqn # pragma: no cover
|
|
185
|
+
|
|
186
|
+
return sorted(nodes, key=key_fn, reverse=(sort_order == SortOrder.DESC))
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""Model store — in-memory indexed collection of Nodes.
|
|
2
|
+
|
|
3
|
+
Provides indexed access by name, kind, namespace, and FQN.
|
|
4
|
+
Supports add, remove, and query operations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from katalyst_engine.core.identity import Identity
|
|
10
|
+
from katalyst_engine.model.node import Node
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ModelStore:
|
|
14
|
+
"""In-memory indexed collection of Nodes.
|
|
15
|
+
|
|
16
|
+
Maintains multiple indexes for fast lookup:
|
|
17
|
+
- by FQN (primary key)
|
|
18
|
+
- by kind
|
|
19
|
+
- by namespace
|
|
20
|
+
- by name
|
|
21
|
+
|
|
22
|
+
This is a mutable container — nodes can be added and removed.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self) -> None:
|
|
26
|
+
self._by_fqn: dict[str, Node] = {}
|
|
27
|
+
self._by_kind: dict[str, list[Node]] = {}
|
|
28
|
+
self._by_namespace: dict[str, list[Node]] = {}
|
|
29
|
+
self._by_name: dict[str, list[Node]] = {}
|
|
30
|
+
|
|
31
|
+
def add(self, node: Node) -> None:
|
|
32
|
+
"""Add a node to the store, replacing any existing node with the same FQN."""
|
|
33
|
+
fqn = node.identity.fqn
|
|
34
|
+
existing = self._by_fqn.get(fqn)
|
|
35
|
+
if existing is not None:
|
|
36
|
+
self._remove_from_indexes(existing)
|
|
37
|
+
self._by_fqn[fqn] = node
|
|
38
|
+
self._add_to_indexes(node)
|
|
39
|
+
|
|
40
|
+
def remove(self, identity: Identity) -> bool:
|
|
41
|
+
"""Remove a node by identity. Returns True if it existed."""
|
|
42
|
+
fqn = identity.fqn
|
|
43
|
+
node = self._by_fqn.pop(fqn, None)
|
|
44
|
+
if node is None:
|
|
45
|
+
return False
|
|
46
|
+
self._remove_from_indexes(node)
|
|
47
|
+
return True
|
|
48
|
+
|
|
49
|
+
def get(self, identity: Identity) -> Node | None:
|
|
50
|
+
"""Look up a node by identity."""
|
|
51
|
+
return self._by_fqn.get(identity.fqn)
|
|
52
|
+
|
|
53
|
+
def get_by_fqn(self, fqn: str) -> Node | None:
|
|
54
|
+
"""Look up a node by FQN string."""
|
|
55
|
+
return self._by_fqn.get(fqn)
|
|
56
|
+
|
|
57
|
+
def by_kind(self, kind: str) -> list[Node]:
|
|
58
|
+
"""Return all nodes of a given kind."""
|
|
59
|
+
return list(self._by_kind.get(kind, []))
|
|
60
|
+
|
|
61
|
+
def by_namespace(self, namespace: str) -> list[Node]:
|
|
62
|
+
"""Return all nodes in a given namespace."""
|
|
63
|
+
return list(self._by_namespace.get(namespace, []))
|
|
64
|
+
|
|
65
|
+
def by_name(self, name: str) -> list[Node]:
|
|
66
|
+
"""Return all nodes with a given local name."""
|
|
67
|
+
return list(self._by_name.get(name, []))
|
|
68
|
+
|
|
69
|
+
def all_nodes(self) -> list[Node]:
|
|
70
|
+
"""Return all nodes, ordered by FQN."""
|
|
71
|
+
return sorted(self._by_fqn.values(), key=lambda n: n.identity.fqn)
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def count(self) -> int:
|
|
75
|
+
"""Number of nodes in the store."""
|
|
76
|
+
return len(self._by_fqn)
|
|
77
|
+
|
|
78
|
+
def clear(self) -> None:
|
|
79
|
+
"""Remove all nodes."""
|
|
80
|
+
self._by_fqn.clear()
|
|
81
|
+
self._by_kind.clear()
|
|
82
|
+
self._by_namespace.clear()
|
|
83
|
+
self._by_name.clear()
|
|
84
|
+
|
|
85
|
+
def _add_to_indexes(self, node: Node) -> None:
|
|
86
|
+
"""Add a node to the secondary indexes."""
|
|
87
|
+
kind = node.identity.kind
|
|
88
|
+
ns = node.identity.namespace
|
|
89
|
+
name = node.identity.name
|
|
90
|
+
|
|
91
|
+
if kind not in self._by_kind:
|
|
92
|
+
self._by_kind[kind] = []
|
|
93
|
+
self._by_kind[kind].append(node)
|
|
94
|
+
|
|
95
|
+
if ns not in self._by_namespace:
|
|
96
|
+
self._by_namespace[ns] = []
|
|
97
|
+
self._by_namespace[ns].append(node)
|
|
98
|
+
|
|
99
|
+
if name not in self._by_name:
|
|
100
|
+
self._by_name[name] = []
|
|
101
|
+
self._by_name[name].append(node)
|
|
102
|
+
|
|
103
|
+
def _remove_from_indexes(self, node: Node) -> None:
|
|
104
|
+
"""Remove a node from the secondary indexes."""
|
|
105
|
+
kind = node.identity.kind
|
|
106
|
+
ns = node.identity.namespace
|
|
107
|
+
name = node.identity.name
|
|
108
|
+
|
|
109
|
+
kind_list = self._by_kind.get(kind)
|
|
110
|
+
if kind_list is not None:
|
|
111
|
+
kind_list[:] = [n for n in kind_list if n.identity.fqn != node.identity.fqn]
|
|
112
|
+
|
|
113
|
+
ns_list = self._by_namespace.get(ns)
|
|
114
|
+
if ns_list is not None:
|
|
115
|
+
ns_list[:] = [n for n in ns_list if n.identity.fqn != node.identity.fqn]
|
|
116
|
+
|
|
117
|
+
name_list = self._by_name.get(name)
|
|
118
|
+
if name_list is not None:
|
|
119
|
+
name_list[:] = [n for n in name_list if n.identity.fqn != node.identity.fqn]
|
katalyst_engine/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Replication — jobs, transforms, and the replication engine.
|
|
2
|
+
|
|
3
|
+
.. note:: FUTURE: Wire into taxonomy when a third sync target arrives or the
|
|
4
|
+
sync flow is refactored. The taxonomy's neo4j and postgres adapters
|
|
5
|
+
hand-roll what ReplicationEngine provides. Making ReplicationEngine
|
|
6
|
+
protocol-driven (accept ReplicationTarget) would allow both adapters
|
|
7
|
+
to share pipeline logic.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from katalyst_engine.replication.engine import ReplicationEngine
|
|
11
|
+
from katalyst_engine.replication.job import (
|
|
12
|
+
ReplicationJob,
|
|
13
|
+
ReplicationMode,
|
|
14
|
+
ReplicationResult,
|
|
15
|
+
)
|
|
16
|
+
from katalyst_engine.replication.transform import (
|
|
17
|
+
TransformPipeline,
|
|
18
|
+
TransformRunner,
|
|
19
|
+
TransformStep,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"ReplicationEngine",
|
|
24
|
+
"ReplicationJob",
|
|
25
|
+
"ReplicationMode",
|
|
26
|
+
"ReplicationResult",
|
|
27
|
+
"TransformPipeline",
|
|
28
|
+
"TransformRunner",
|
|
29
|
+
"TransformStep",
|
|
30
|
+
]
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Replication engine — executes replication jobs.
|
|
2
|
+
|
|
3
|
+
Coordinates the source registry, model store, and transform pipeline
|
|
4
|
+
to replicate nodes from one source to another.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from katalyst_engine.model.store import ModelStore
|
|
10
|
+
from katalyst_engine.replication.job import (
|
|
11
|
+
ReplicationJob,
|
|
12
|
+
ReplicationMode,
|
|
13
|
+
ReplicationResult,
|
|
14
|
+
)
|
|
15
|
+
from katalyst_engine.replication.transform import TransformPipeline, TransformRunner
|
|
16
|
+
from katalyst_engine.source.registry import SourceRegistry
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ReplicationEngine:
|
|
20
|
+
"""Executes replication jobs using source registry and model store.
|
|
21
|
+
|
|
22
|
+
The engine reads nodes from the source model store, applies
|
|
23
|
+
optional transforms, and writes them to the target store.
|
|
24
|
+
|
|
25
|
+
Usage:
|
|
26
|
+
engine = ReplicationEngine(source_registry, source_store, target_store)
|
|
27
|
+
engine.set_transform_runner(runner)
|
|
28
|
+
result = engine.execute(job)
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
source_registry: SourceRegistry,
|
|
34
|
+
source_store: ModelStore,
|
|
35
|
+
target_store: ModelStore,
|
|
36
|
+
) -> None:
|
|
37
|
+
self._source_registry = source_registry
|
|
38
|
+
self._source_store = source_store
|
|
39
|
+
self._target_store = target_store
|
|
40
|
+
self._transform_runner: TransformRunner | None = None
|
|
41
|
+
self._pipeline: TransformPipeline | None = None
|
|
42
|
+
|
|
43
|
+
def set_transform_runner(
|
|
44
|
+
self, runner: TransformRunner, pipeline: TransformPipeline | None = None
|
|
45
|
+
) -> None:
|
|
46
|
+
"""Configure the transform runner and optional pipeline."""
|
|
47
|
+
self._transform_runner = runner
|
|
48
|
+
self._pipeline = pipeline
|
|
49
|
+
|
|
50
|
+
def execute(self, job: ReplicationJob) -> ReplicationResult:
|
|
51
|
+
"""Execute a replication job.
|
|
52
|
+
|
|
53
|
+
Reads nodes from the source store, applies filters and
|
|
54
|
+
transforms, then writes to the target store according
|
|
55
|
+
to the job's mode.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
job: The replication job configuration.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
A ReplicationResult describing what happened.
|
|
62
|
+
"""
|
|
63
|
+
source_nodes = self._source_store.all_nodes()
|
|
64
|
+
source_nodes = [n for n in source_nodes if job.filter.matches(n)]
|
|
65
|
+
|
|
66
|
+
if self._transform_runner and self._pipeline:
|
|
67
|
+
source_nodes = [self._transform_runner.run(self._pipeline, n) for n in source_nodes]
|
|
68
|
+
|
|
69
|
+
if job.dry_run:
|
|
70
|
+
return ReplicationResult(
|
|
71
|
+
job=job,
|
|
72
|
+
created_count=len(source_nodes),
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
created = 0
|
|
76
|
+
updated = 0
|
|
77
|
+
skipped = 0
|
|
78
|
+
errors: list[str] = []
|
|
79
|
+
|
|
80
|
+
if job.mode == ReplicationMode.FULL:
|
|
81
|
+
self._target_store.clear()
|
|
82
|
+
|
|
83
|
+
for node in source_nodes:
|
|
84
|
+
try:
|
|
85
|
+
existing = self._target_store.get(node.identity)
|
|
86
|
+
if existing is not None:
|
|
87
|
+
if existing == node:
|
|
88
|
+
skipped += 1
|
|
89
|
+
else:
|
|
90
|
+
self._target_store.add(node)
|
|
91
|
+
updated += 1
|
|
92
|
+
else:
|
|
93
|
+
self._target_store.add(node)
|
|
94
|
+
created += 1
|
|
95
|
+
except Exception as exc:
|
|
96
|
+
errors.append(f"Failed to replicate {node.identity.fqn}: {exc}")
|
|
97
|
+
|
|
98
|
+
return ReplicationResult(
|
|
99
|
+
job=job,
|
|
100
|
+
created_count=created,
|
|
101
|
+
updated_count=updated,
|
|
102
|
+
skipped_count=skipped,
|
|
103
|
+
errors=tuple(errors),
|
|
104
|
+
)
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""Replication job — configuration and results for data replication.
|
|
2
|
+
|
|
3
|
+
A ReplicationJob describes what to replicate, from where, to where,
|
|
4
|
+
and how. A ReplicationResult captures the outcome.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from enum import Enum
|
|
10
|
+
|
|
11
|
+
from pydantic import BaseModel
|
|
12
|
+
|
|
13
|
+
from katalyst_engine.core.identity import Identity
|
|
14
|
+
from katalyst_engine.model.query import QueryFilter
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ReplicationMode(str, Enum):
|
|
18
|
+
"""How to handle existing data at the target."""
|
|
19
|
+
|
|
20
|
+
FULL = "full"
|
|
21
|
+
"""Replace all target data with source data."""
|
|
22
|
+
|
|
23
|
+
INCREMENTAL = "incremental"
|
|
24
|
+
"""Only replicate changed items since last sync."""
|
|
25
|
+
|
|
26
|
+
MERGE = "merge"
|
|
27
|
+
"""Merge source data into target, keeping non-conflicting target data."""
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ReplicationJob(BaseModel, frozen=True):
|
|
31
|
+
"""Configuration for a replication operation.
|
|
32
|
+
|
|
33
|
+
Describes the source, target, filter criteria, and mode
|
|
34
|
+
for replicating data between sources.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
source_identity: Identity
|
|
38
|
+
"""Identity of the source to replicate from."""
|
|
39
|
+
|
|
40
|
+
target_identity: Identity
|
|
41
|
+
"""Identity of the target to replicate to."""
|
|
42
|
+
|
|
43
|
+
filter: QueryFilter = QueryFilter()
|
|
44
|
+
"""Optional filter to select a subset of nodes."""
|
|
45
|
+
|
|
46
|
+
mode: ReplicationMode = ReplicationMode.INCREMENTAL
|
|
47
|
+
"""How to handle existing target data."""
|
|
48
|
+
|
|
49
|
+
description: str = ""
|
|
50
|
+
"""Human-readable description of this replication job."""
|
|
51
|
+
|
|
52
|
+
dry_run: bool = False
|
|
53
|
+
"""If True, compute what would change without actually replicating."""
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class ReplicationResult(BaseModel, frozen=True):
|
|
57
|
+
"""Outcome of a replication operation.
|
|
58
|
+
|
|
59
|
+
Captures counts of what was replicated and any errors encountered.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
job: ReplicationJob
|
|
63
|
+
"""The job that produced this result."""
|
|
64
|
+
|
|
65
|
+
created_count: int = 0
|
|
66
|
+
"""Number of nodes created at the target."""
|
|
67
|
+
|
|
68
|
+
updated_count: int = 0
|
|
69
|
+
"""Number of nodes updated at the target."""
|
|
70
|
+
|
|
71
|
+
deleted_count: int = 0
|
|
72
|
+
"""Number of nodes deleted at the target."""
|
|
73
|
+
|
|
74
|
+
skipped_count: int = 0
|
|
75
|
+
"""Number of nodes skipped (already up to date)."""
|
|
76
|
+
|
|
77
|
+
errors: tuple[str, ...] = ()
|
|
78
|
+
"""Error messages for individual items that failed."""
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def total_processed(self) -> int:
|
|
82
|
+
"""Total items processed (created + updated + deleted + skipped)."""
|
|
83
|
+
return self.created_count + self.updated_count + self.deleted_count + self.skipped_count
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
def success(self) -> bool:
|
|
87
|
+
"""True if no errors occurred."""
|
|
88
|
+
return len(self.errors) == 0
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""Transform pipeline — ordered transformations applied during replication.
|
|
2
|
+
|
|
3
|
+
TransformSteps are individual transformations; a TransformPipeline
|
|
4
|
+
chains them together and applies them to nodes during replication.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from abc import ABC, abstractmethod
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel
|
|
13
|
+
|
|
14
|
+
from katalyst_engine.model.node import Node
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TransformStep(ABC):
|
|
18
|
+
"""Abstract base for a single transformation applied during replication.
|
|
19
|
+
|
|
20
|
+
Implementations transform a node's data as it moves from
|
|
21
|
+
source to target. Steps can modify spec fields, labels,
|
|
22
|
+
annotations, or even change the node's identity.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
@abstractmethod
|
|
26
|
+
def transform(self, node: Node, context: dict[str, Any]) -> Node:
|
|
27
|
+
"""Transform a node.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
node: The node to transform.
|
|
31
|
+
context: Additional context (e.g. source/target metadata).
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
A new Node with the transformation applied.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
@abstractmethod
|
|
38
|
+
def name(self) -> str:
|
|
39
|
+
"""Human-readable name of this transform step."""
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class TransformPipeline(BaseModel, frozen=True):
|
|
43
|
+
"""Ordered list of transform step names applied during replication.
|
|
44
|
+
|
|
45
|
+
The pipeline itself is a frozen model describing the configuration.
|
|
46
|
+
Execution is handled by the ReplicationEngine, which looks up
|
|
47
|
+
TransformStep instances by name.
|
|
48
|
+
|
|
49
|
+
Note: TransformStep instances are not serializable (they're ABCs),
|
|
50
|
+
so the pipeline stores step names as strings. The engine maintains
|
|
51
|
+
the name → step mapping.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
step_names: tuple[str, ...] = ()
|
|
55
|
+
"""Ordered names of transform steps to apply."""
|
|
56
|
+
|
|
57
|
+
description: str = ""
|
|
58
|
+
"""Human-readable description of what this pipeline does."""
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class TransformRunner:
|
|
62
|
+
"""Executes a TransformPipeline against nodes.
|
|
63
|
+
|
|
64
|
+
Maintains a registry of named TransformSteps and applies them
|
|
65
|
+
in pipeline order to each node.
|
|
66
|
+
|
|
67
|
+
Usage:
|
|
68
|
+
runner = TransformRunner()
|
|
69
|
+
runner.register("strip_labels", MyStripLabelsStep())
|
|
70
|
+
runner.register("rename_kind", MyRenameKindStep())
|
|
71
|
+
transformed = runner.run(pipeline, node, context)
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
def __init__(self) -> None:
|
|
75
|
+
self._steps: dict[str, TransformStep] = {}
|
|
76
|
+
|
|
77
|
+
def register(self, name: str, step: TransformStep) -> None:
|
|
78
|
+
"""Register a named transform step."""
|
|
79
|
+
self._steps[name] = step
|
|
80
|
+
|
|
81
|
+
def run(
|
|
82
|
+
self,
|
|
83
|
+
pipeline: TransformPipeline,
|
|
84
|
+
node: Node,
|
|
85
|
+
context: dict[str, Any] | None = None,
|
|
86
|
+
) -> Node:
|
|
87
|
+
"""Apply all steps in a pipeline to a node.
|
|
88
|
+
|
|
89
|
+
Steps are applied in order. Each step receives the output
|
|
90
|
+
of the previous step. Unknown step names are skipped.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
pipeline: The pipeline configuration.
|
|
94
|
+
node: The input node.
|
|
95
|
+
context: Optional context dict passed to each step.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
The transformed node.
|
|
99
|
+
"""
|
|
100
|
+
ctx = context or {}
|
|
101
|
+
result = node
|
|
102
|
+
for step_name in pipeline.step_names:
|
|
103
|
+
step = self._steps.get(step_name)
|
|
104
|
+
if step is not None:
|
|
105
|
+
result = step.transform(result, ctx)
|
|
106
|
+
return result
|
|
107
|
+
|
|
108
|
+
@property
|
|
109
|
+
def step_count(self) -> int:
|
|
110
|
+
"""Number of registered transform steps."""
|
|
111
|
+
return len(self._steps)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Resolution — detecting and resolving conflicts between multi-source declarations.
|
|
2
|
+
|
|
3
|
+
.. note:: FUTURE: Wire into taxonomy when multi-source node conflicts need handling.
|
|
4
|
+
When two sources declare the same node identity, the ResolutionEngine
|
|
5
|
+
picks a winner using pluggable strategies. Pairs with source/ and
|
|
6
|
+
discovery/ as a coherent pipeline.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from katalyst_engine.resolution.conflict import (
|
|
10
|
+
Conflict,
|
|
11
|
+
ConflictReport,
|
|
12
|
+
ConflictSeverity,
|
|
13
|
+
)
|
|
14
|
+
from katalyst_engine.resolution.engine import ResolutionEngine, ResolutionResult
|
|
15
|
+
from katalyst_engine.resolution.strategies import (
|
|
16
|
+
CanonicalSourceStrategy,
|
|
17
|
+
LatestVersionStrategy,
|
|
18
|
+
MergeStrategy,
|
|
19
|
+
ResolutionStrategy,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"CanonicalSourceStrategy",
|
|
24
|
+
"Conflict",
|
|
25
|
+
"ConflictReport",
|
|
26
|
+
"ConflictSeverity",
|
|
27
|
+
"LatestVersionStrategy",
|
|
28
|
+
"MergeStrategy",
|
|
29
|
+
"ResolutionEngine",
|
|
30
|
+
"ResolutionResult",
|
|
31
|
+
"ResolutionStrategy",
|
|
32
|
+
]
|