crochet-migration 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crochet/__init__.py +3 -0
- crochet/cli.py +327 -0
- crochet/config.py +116 -0
- crochet/errors.py +75 -0
- crochet/ingest/__init__.py +5 -0
- crochet/ingest/batch.py +61 -0
- crochet/ir/__init__.py +23 -0
- crochet/ir/diff.py +199 -0
- crochet/ir/hash.py +36 -0
- crochet/ir/parser.py +251 -0
- crochet/ir/schema.py +196 -0
- crochet/ledger/__init__.py +5 -0
- crochet/ledger/sqlite.py +282 -0
- crochet/migrations/__init__.py +6 -0
- crochet/migrations/engine.py +279 -0
- crochet/migrations/operations.py +267 -0
- crochet/migrations/template.py +105 -0
- crochet/scaffold/__init__.py +6 -0
- crochet/scaffold/node.py +48 -0
- crochet/scaffold/relationship.py +52 -0
- crochet/verify.py +141 -0
- crochet_migration-0.1.0.dist-info/METADATA +278 -0
- crochet_migration-0.1.0.dist-info/RECORD +26 -0
- crochet_migration-0.1.0.dist-info/WHEEL +4 -0
- crochet_migration-0.1.0.dist-info/entry_points.txt +2 -0
- crochet_migration-0.1.0.dist-info/licenses/LICENSE +21 -0
crochet/ir/diff.py
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
"""Diff two schema snapshots to produce migration intent."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from crochet.ir.schema import NodeIR, PropertyIR, RelationshipIR, SchemaSnapshot
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class PropertyChange:
|
|
14
|
+
"""A single property-level change."""
|
|
15
|
+
|
|
16
|
+
kind: str # "added", "removed", "modified"
|
|
17
|
+
property_name: str
|
|
18
|
+
old: "PropertyIR | None" = None
|
|
19
|
+
new: "PropertyIR | None" = None
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
def description(self) -> str:
|
|
23
|
+
if self.kind == "added":
|
|
24
|
+
return f" + property '{self.property_name}' ({self.new.property_type})" # type: ignore[union-attr]
|
|
25
|
+
elif self.kind == "removed":
|
|
26
|
+
return f" - property '{self.property_name}'"
|
|
27
|
+
else:
|
|
28
|
+
changes = []
|
|
29
|
+
if self.old and self.new:
|
|
30
|
+
if self.old.property_type != self.new.property_type:
|
|
31
|
+
changes.append(
|
|
32
|
+
f"type {self.old.property_type} -> {self.new.property_type}"
|
|
33
|
+
)
|
|
34
|
+
if self.old.required != self.new.required:
|
|
35
|
+
changes.append(f"required={self.new.required}")
|
|
36
|
+
if self.old.unique_index != self.new.unique_index:
|
|
37
|
+
changes.append(f"unique_index={self.new.unique_index}")
|
|
38
|
+
if self.old.index != self.new.index:
|
|
39
|
+
changes.append(f"index={self.new.index}")
|
|
40
|
+
detail = ", ".join(changes) or "modified"
|
|
41
|
+
return f" ~ property '{self.property_name}' ({detail})"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class NodeChange:
|
|
46
|
+
"""Change descriptor for a node."""
|
|
47
|
+
|
|
48
|
+
kind: str # "added", "removed", "modified"
|
|
49
|
+
kgid: str
|
|
50
|
+
old: "NodeIR | None" = None
|
|
51
|
+
new: "NodeIR | None" = None
|
|
52
|
+
property_changes: list[PropertyChange] = field(default_factory=list)
|
|
53
|
+
label_renamed: bool = False
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def description(self) -> str:
|
|
57
|
+
if self.kind == "added":
|
|
58
|
+
return f"+ Node '{self.new.label}' (kgid={self.kgid})" # type: ignore[union-attr]
|
|
59
|
+
elif self.kind == "removed":
|
|
60
|
+
return f"- Node '{self.old.label}' (kgid={self.kgid})" # type: ignore[union-attr]
|
|
61
|
+
else:
|
|
62
|
+
parts = [f"~ Node kgid={self.kgid}"]
|
|
63
|
+
if self.label_renamed:
|
|
64
|
+
parts.append(
|
|
65
|
+
f" renamed '{self.old.label}' -> '{self.new.label}'" # type: ignore[union-attr]
|
|
66
|
+
)
|
|
67
|
+
for pc in self.property_changes:
|
|
68
|
+
parts.append(pc.description)
|
|
69
|
+
return "\n".join(parts)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass
|
|
73
|
+
class RelationshipChange:
|
|
74
|
+
"""Change descriptor for a relationship model."""
|
|
75
|
+
|
|
76
|
+
kind: str
|
|
77
|
+
kgid: str
|
|
78
|
+
old: "RelationshipIR | None" = None
|
|
79
|
+
new: "RelationshipIR | None" = None
|
|
80
|
+
property_changes: list[PropertyChange] = field(default_factory=list)
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def description(self) -> str:
|
|
84
|
+
if self.kind == "added":
|
|
85
|
+
return f"+ Relationship '{self.new.rel_type}' (kgid={self.kgid})" # type: ignore[union-attr]
|
|
86
|
+
elif self.kind == "removed":
|
|
87
|
+
return f"- Relationship '{self.old.rel_type}' (kgid={self.kgid})" # type: ignore[union-attr]
|
|
88
|
+
else:
|
|
89
|
+
parts = [f"~ Relationship kgid={self.kgid}"]
|
|
90
|
+
for pc in self.property_changes:
|
|
91
|
+
parts.append(pc.description)
|
|
92
|
+
return "\n".join(parts)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@dataclass
|
|
96
|
+
class SchemaDiff:
|
|
97
|
+
"""Full diff between two schema snapshots."""
|
|
98
|
+
|
|
99
|
+
node_changes: list[NodeChange] = field(default_factory=list)
|
|
100
|
+
relationship_changes: list[RelationshipChange] = field(default_factory=list)
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def has_changes(self) -> bool:
|
|
104
|
+
return bool(self.node_changes or self.relationship_changes)
|
|
105
|
+
|
|
106
|
+
def summary(self) -> str:
|
|
107
|
+
if not self.has_changes:
|
|
108
|
+
return "No schema changes detected."
|
|
109
|
+
lines: list[str] = []
|
|
110
|
+
for nc in self.node_changes:
|
|
111
|
+
lines.append(nc.description)
|
|
112
|
+
for rc in self.relationship_changes:
|
|
113
|
+
lines.append(rc.description)
|
|
114
|
+
return "\n".join(lines)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _diff_properties(
|
|
118
|
+
old_props: tuple["PropertyIR", ...],
|
|
119
|
+
new_props: tuple["PropertyIR", ...],
|
|
120
|
+
) -> list[PropertyChange]:
|
|
121
|
+
"""Diff two sets of properties by name."""
|
|
122
|
+
old_map = {p.name: p for p in old_props}
|
|
123
|
+
new_map = {p.name: p for p in new_props}
|
|
124
|
+
changes: list[PropertyChange] = []
|
|
125
|
+
|
|
126
|
+
for name in sorted(set(old_map) | set(new_map)):
|
|
127
|
+
old_p = old_map.get(name)
|
|
128
|
+
new_p = new_map.get(name)
|
|
129
|
+
if old_p is None and new_p is not None:
|
|
130
|
+
changes.append(PropertyChange(kind="added", property_name=name, new=new_p))
|
|
131
|
+
elif old_p is not None and new_p is None:
|
|
132
|
+
changes.append(PropertyChange(kind="removed", property_name=name, old=old_p))
|
|
133
|
+
elif old_p != new_p:
|
|
134
|
+
changes.append(
|
|
135
|
+
PropertyChange(kind="modified", property_name=name, old=old_p, new=new_p)
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
return changes
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def diff_snapshots(
|
|
142
|
+
old: "SchemaSnapshot", new: "SchemaSnapshot"
|
|
143
|
+
) -> SchemaDiff:
|
|
144
|
+
"""Compute a SchemaDiff between two snapshots keyed by __kgid__."""
|
|
145
|
+
diff = SchemaDiff()
|
|
146
|
+
|
|
147
|
+
old_nodes = old.nodes_by_kgid
|
|
148
|
+
new_nodes = new.nodes_by_kgid
|
|
149
|
+
|
|
150
|
+
for kgid in sorted(set(old_nodes) | set(new_nodes)):
|
|
151
|
+
old_n = old_nodes.get(kgid)
|
|
152
|
+
new_n = new_nodes.get(kgid)
|
|
153
|
+
if old_n is None and new_n is not None:
|
|
154
|
+
diff.node_changes.append(NodeChange(kind="added", kgid=kgid, new=new_n))
|
|
155
|
+
elif old_n is not None and new_n is None:
|
|
156
|
+
diff.node_changes.append(NodeChange(kind="removed", kgid=kgid, old=old_n))
|
|
157
|
+
elif old_n != new_n:
|
|
158
|
+
prop_changes = _diff_properties(old_n.properties, new_n.properties) # type: ignore[union-attr]
|
|
159
|
+
label_renamed = old_n.label != new_n.label # type: ignore[union-attr]
|
|
160
|
+
if prop_changes or label_renamed:
|
|
161
|
+
diff.node_changes.append(
|
|
162
|
+
NodeChange(
|
|
163
|
+
kind="modified",
|
|
164
|
+
kgid=kgid,
|
|
165
|
+
old=old_n,
|
|
166
|
+
new=new_n,
|
|
167
|
+
property_changes=prop_changes,
|
|
168
|
+
label_renamed=label_renamed,
|
|
169
|
+
)
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
old_rels = old.relationships_by_kgid
|
|
173
|
+
new_rels = new.relationships_by_kgid
|
|
174
|
+
|
|
175
|
+
for kgid in sorted(set(old_rels) | set(new_rels)):
|
|
176
|
+
old_r = old_rels.get(kgid)
|
|
177
|
+
new_r = new_rels.get(kgid)
|
|
178
|
+
if old_r is None and new_r is not None:
|
|
179
|
+
diff.relationship_changes.append(
|
|
180
|
+
RelationshipChange(kind="added", kgid=kgid, new=new_r)
|
|
181
|
+
)
|
|
182
|
+
elif old_r is not None and new_r is None:
|
|
183
|
+
diff.relationship_changes.append(
|
|
184
|
+
RelationshipChange(kind="removed", kgid=kgid, old=old_r)
|
|
185
|
+
)
|
|
186
|
+
elif old_r != new_r:
|
|
187
|
+
prop_changes = _diff_properties(old_r.properties, new_r.properties) # type: ignore[union-attr]
|
|
188
|
+
if prop_changes:
|
|
189
|
+
diff.relationship_changes.append(
|
|
190
|
+
RelationshipChange(
|
|
191
|
+
kind="modified",
|
|
192
|
+
kgid=kgid,
|
|
193
|
+
old=old_r,
|
|
194
|
+
new=new_r,
|
|
195
|
+
property_changes=prop_changes,
|
|
196
|
+
)
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
return diff
|
crochet/ir/hash.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Deterministic hashing for schema snapshots."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import json
|
|
7
|
+
from dataclasses import replace
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from crochet.ir.schema import SchemaSnapshot
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _canonical_json(snapshot: "SchemaSnapshot") -> str:
|
|
15
|
+
"""Produce a deterministic JSON string for hashing.
|
|
16
|
+
|
|
17
|
+
We exclude ``created_at`` and ``schema_hash`` so that two snapshots with
|
|
18
|
+
identical structure always produce the same hash regardless of when they
|
|
19
|
+
were created.
|
|
20
|
+
"""
|
|
21
|
+
d = snapshot.to_dict()
|
|
22
|
+
d.pop("created_at", None)
|
|
23
|
+
d.pop("schema_hash", None)
|
|
24
|
+
return json.dumps(d, sort_keys=True, separators=(",", ":"))
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def compute_hash(snapshot: "SchemaSnapshot") -> str:
|
|
28
|
+
"""Return the SHA-256 hex digest for a snapshot's canonical form."""
|
|
29
|
+
canonical = _canonical_json(snapshot)
|
|
30
|
+
return hashlib.sha256(canonical.encode("utf-8")).hexdigest()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def hash_snapshot(snapshot: "SchemaSnapshot") -> "SchemaSnapshot":
|
|
34
|
+
"""Return a new snapshot with ``schema_hash`` populated."""
|
|
35
|
+
h = compute_hash(snapshot)
|
|
36
|
+
return replace(snapshot, schema_hash=h)
|
crochet/ir/parser.py
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
"""Parse neomodel Python files into schema IR without a Neo4j connection."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import importlib
|
|
6
|
+
import importlib.util
|
|
7
|
+
import inspect
|
|
8
|
+
import sys
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from neomodel import (
|
|
13
|
+
RelationshipFrom,
|
|
14
|
+
RelationshipTo,
|
|
15
|
+
StructuredNode,
|
|
16
|
+
StructuredRel,
|
|
17
|
+
)
|
|
18
|
+
from neomodel.properties import Property
|
|
19
|
+
|
|
20
|
+
from crochet.errors import DuplicateKGIDError, MissingKGIDError
|
|
21
|
+
from crochet.ir.hash import hash_snapshot
|
|
22
|
+
from crochet.ir.schema import (
|
|
23
|
+
NodeIR,
|
|
24
|
+
PropertyIR,
|
|
25
|
+
RelationshipDefIR,
|
|
26
|
+
RelationshipIR,
|
|
27
|
+
SchemaSnapshot,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
# Relationship manager classes we recognise and their direction labels.
|
|
31
|
+
_REL_MANAGERS: dict[type, str] = {}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _init_rel_managers() -> None:
|
|
35
|
+
"""Populate the relationship-manager mapping lazily."""
|
|
36
|
+
if _REL_MANAGERS:
|
|
37
|
+
return
|
|
38
|
+
_REL_MANAGERS[RelationshipTo] = "to"
|
|
39
|
+
_REL_MANAGERS[RelationshipFrom] = "from"
|
|
40
|
+
try:
|
|
41
|
+
from neomodel import Relationship
|
|
42
|
+
|
|
43
|
+
_REL_MANAGERS[Relationship] = "either"
|
|
44
|
+
except ImportError:
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _extract_property_ir(name: str, prop: Property) -> PropertyIR:
|
|
49
|
+
"""Turn a neomodel Property instance into a PropertyIR."""
|
|
50
|
+
return PropertyIR(
|
|
51
|
+
name=name,
|
|
52
|
+
property_type=type(prop).__name__,
|
|
53
|
+
required=getattr(prop, "required", False),
|
|
54
|
+
unique_index=getattr(prop, "unique_index", False),
|
|
55
|
+
index=getattr(prop, "index", False),
|
|
56
|
+
default=None, # we don't capture callables
|
|
57
|
+
choices=tuple(prop.choices) if getattr(prop, "choices", None) else None,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _extract_rel_def_ir(attr_name: str, rel_mgr: Any) -> RelationshipDefIR | None:
|
|
62
|
+
"""Turn a neomodel relationship manager into a RelationshipDefIR."""
|
|
63
|
+
_init_rel_managers()
|
|
64
|
+
mgr_type = type(rel_mgr)
|
|
65
|
+
direction = None
|
|
66
|
+
for cls, dir_label in _REL_MANAGERS.items():
|
|
67
|
+
if issubclass(mgr_type, cls):
|
|
68
|
+
direction = dir_label
|
|
69
|
+
break
|
|
70
|
+
if direction is None:
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
definition = rel_mgr.definition
|
|
74
|
+
rel_type = definition.get("relation_type", "RELATED_TO")
|
|
75
|
+
|
|
76
|
+
# Resolve the target model label
|
|
77
|
+
target_cls = definition.get("node_class")
|
|
78
|
+
if isinstance(target_cls, type) and issubclass(target_cls, StructuredNode):
|
|
79
|
+
target_label = getattr(target_cls, "__label__", target_cls.__name__)
|
|
80
|
+
else:
|
|
81
|
+
target_label = str(target_cls) if target_cls else "UNKNOWN"
|
|
82
|
+
|
|
83
|
+
# Check for a StructuredRel model
|
|
84
|
+
model_cls = definition.get("model")
|
|
85
|
+
model_kgid = None
|
|
86
|
+
if model_cls is not None and isinstance(model_cls, type):
|
|
87
|
+
model_kgid = getattr(model_cls, "__kgid__", None)
|
|
88
|
+
|
|
89
|
+
return RelationshipDefIR(
|
|
90
|
+
attr_name=attr_name,
|
|
91
|
+
rel_type=rel_type,
|
|
92
|
+
target_label=target_label,
|
|
93
|
+
direction=direction,
|
|
94
|
+
model_kgid=model_kgid,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _parse_node_class(cls: type) -> NodeIR:
|
|
99
|
+
"""Parse a single StructuredNode subclass into a NodeIR."""
|
|
100
|
+
kgid = getattr(cls, "__kgid__", None)
|
|
101
|
+
if kgid is None:
|
|
102
|
+
raise MissingKGIDError(cls.__name__)
|
|
103
|
+
|
|
104
|
+
label = getattr(cls, "__label__", cls.__name__)
|
|
105
|
+
module_path = cls.__module__
|
|
106
|
+
|
|
107
|
+
properties: list[PropertyIR] = []
|
|
108
|
+
rel_defs: list[RelationshipDefIR] = []
|
|
109
|
+
|
|
110
|
+
# Walk class attributes (not parent StructuredNode's)
|
|
111
|
+
for attr_name in dir(cls):
|
|
112
|
+
if attr_name.startswith("_"):
|
|
113
|
+
continue
|
|
114
|
+
try:
|
|
115
|
+
attr = getattr(cls, attr_name)
|
|
116
|
+
except Exception:
|
|
117
|
+
continue
|
|
118
|
+
|
|
119
|
+
if isinstance(attr, Property):
|
|
120
|
+
properties.append(_extract_property_ir(attr_name, attr))
|
|
121
|
+
else:
|
|
122
|
+
rd = _extract_rel_def_ir(attr_name, attr)
|
|
123
|
+
if rd is not None:
|
|
124
|
+
rel_defs.append(rd)
|
|
125
|
+
|
|
126
|
+
return NodeIR(
|
|
127
|
+
kgid=kgid,
|
|
128
|
+
label=label,
|
|
129
|
+
class_name=cls.__name__,
|
|
130
|
+
module_path=module_path,
|
|
131
|
+
properties=tuple(sorted(properties)),
|
|
132
|
+
relationship_defs=tuple(rel_defs),
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _parse_rel_class(cls: type) -> RelationshipIR:
|
|
137
|
+
"""Parse a single StructuredRel subclass into a RelationshipIR."""
|
|
138
|
+
kgid = getattr(cls, "__kgid__", None)
|
|
139
|
+
if kgid is None:
|
|
140
|
+
raise MissingKGIDError(cls.__name__)
|
|
141
|
+
|
|
142
|
+
rel_type = getattr(cls, "__type__", cls.__name__.upper())
|
|
143
|
+
module_path = cls.__module__
|
|
144
|
+
|
|
145
|
+
properties: list[PropertyIR] = []
|
|
146
|
+
for attr_name in dir(cls):
|
|
147
|
+
if attr_name.startswith("_"):
|
|
148
|
+
continue
|
|
149
|
+
try:
|
|
150
|
+
attr = getattr(cls, attr_name)
|
|
151
|
+
except Exception:
|
|
152
|
+
continue
|
|
153
|
+
if isinstance(attr, Property):
|
|
154
|
+
properties.append(_extract_property_ir(attr_name, attr))
|
|
155
|
+
|
|
156
|
+
return RelationshipIR(
|
|
157
|
+
kgid=kgid,
|
|
158
|
+
rel_type=rel_type,
|
|
159
|
+
class_name=cls.__name__,
|
|
160
|
+
module_path=module_path,
|
|
161
|
+
properties=tuple(sorted(properties)),
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _clear_neomodel_registry() -> None:
|
|
166
|
+
"""Remove all entries from neomodel's class registry so re-imports work."""
|
|
167
|
+
try:
|
|
168
|
+
from neomodel import db
|
|
169
|
+
|
|
170
|
+
if hasattr(db, "_NODE_CLASS_REGISTRY"):
|
|
171
|
+
db._NODE_CLASS_REGISTRY.clear()
|
|
172
|
+
if hasattr(db, "_DB_SPECIFIC_CLASS_REGISTRY"):
|
|
173
|
+
db._DB_SPECIFIC_CLASS_REGISTRY.clear()
|
|
174
|
+
except (ImportError, AttributeError):
|
|
175
|
+
pass
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
_load_counter = 0
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _load_module_from_path(file_path: Path) -> Any:
|
|
182
|
+
"""Import a Python file as a module.
|
|
183
|
+
|
|
184
|
+
Each call uses a unique module name so that neomodel's global class
|
|
185
|
+
registry does not raise ``NodeClassAlreadyDefined`` on repeated parses
|
|
186
|
+
of the same label across different directories or test runs.
|
|
187
|
+
"""
|
|
188
|
+
global _load_counter
|
|
189
|
+
_load_counter += 1
|
|
190
|
+
module_name = f"crochet._user_models._{_load_counter}_{file_path.stem}"
|
|
191
|
+
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
|
192
|
+
if spec is None or spec.loader is None:
|
|
193
|
+
return None
|
|
194
|
+
module = importlib.util.module_from_spec(spec)
|
|
195
|
+
sys.modules[module_name] = module
|
|
196
|
+
try:
|
|
197
|
+
spec.loader.exec_module(module)
|
|
198
|
+
except Exception:
|
|
199
|
+
# If the module fails to import, skip it rather than crashing
|
|
200
|
+
del sys.modules[module_name]
|
|
201
|
+
raise
|
|
202
|
+
return module
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def parse_module(module: Any) -> tuple[list[NodeIR], list[RelationshipIR]]:
|
|
206
|
+
"""Extract all NodeIR and RelationshipIR from an already-loaded module."""
|
|
207
|
+
nodes: list[NodeIR] = []
|
|
208
|
+
rels: list[RelationshipIR] = []
|
|
209
|
+
|
|
210
|
+
for _name, obj in inspect.getmembers(module, inspect.isclass):
|
|
211
|
+
if obj.__module__ != module.__name__:
|
|
212
|
+
continue # skip imported classes
|
|
213
|
+
if issubclass(obj, StructuredNode) and obj is not StructuredNode:
|
|
214
|
+
nodes.append(_parse_node_class(obj))
|
|
215
|
+
elif issubclass(obj, StructuredRel) and obj is not StructuredRel:
|
|
216
|
+
rels.append(_parse_rel_class(obj))
|
|
217
|
+
|
|
218
|
+
return nodes, rels
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def parse_models_directory(models_dir: Path) -> SchemaSnapshot:
|
|
222
|
+
"""Parse all .py files in *models_dir* and return a SchemaSnapshot."""
|
|
223
|
+
_clear_neomodel_registry()
|
|
224
|
+
|
|
225
|
+
all_nodes: list[NodeIR] = []
|
|
226
|
+
all_rels: list[RelationshipIR] = []
|
|
227
|
+
seen_kgids: dict[str, str] = {}
|
|
228
|
+
|
|
229
|
+
py_files = sorted(models_dir.rglob("*.py"))
|
|
230
|
+
for py_file in py_files:
|
|
231
|
+
if py_file.name.startswith("_"):
|
|
232
|
+
continue
|
|
233
|
+
module = _load_module_from_path(py_file)
|
|
234
|
+
if module is None:
|
|
235
|
+
continue
|
|
236
|
+
nodes, rels = parse_module(module)
|
|
237
|
+
|
|
238
|
+
for n in nodes:
|
|
239
|
+
if n.kgid in seen_kgids:
|
|
240
|
+
raise DuplicateKGIDError(n.kgid, seen_kgids[n.kgid], n.class_name)
|
|
241
|
+
seen_kgids[n.kgid] = n.class_name
|
|
242
|
+
all_nodes.append(n)
|
|
243
|
+
|
|
244
|
+
for r in rels:
|
|
245
|
+
if r.kgid in seen_kgids:
|
|
246
|
+
raise DuplicateKGIDError(r.kgid, seen_kgids[r.kgid], r.class_name)
|
|
247
|
+
seen_kgids[r.kgid] = r.class_name
|
|
248
|
+
all_rels.append(r)
|
|
249
|
+
|
|
250
|
+
snapshot = SchemaSnapshot(nodes=tuple(all_nodes), relationships=tuple(all_rels))
|
|
251
|
+
return hash_snapshot(snapshot)
|
crochet/ir/schema.py
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""Data structures for the schema Intermediate Representation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass(frozen=True, order=True)
|
|
12
|
+
class PropertyIR:
|
|
13
|
+
"""IR for a single property on a node or relationship."""
|
|
14
|
+
|
|
15
|
+
name: str
|
|
16
|
+
property_type: str # e.g. "StringProperty", "IntegerProperty"
|
|
17
|
+
required: bool = False
|
|
18
|
+
unique_index: bool = False
|
|
19
|
+
index: bool = False
|
|
20
|
+
default: Any = None
|
|
21
|
+
choices: tuple | None = None
|
|
22
|
+
|
|
23
|
+
def to_dict(self) -> dict:
|
|
24
|
+
d: dict[str, Any] = {
|
|
25
|
+
"name": self.name,
|
|
26
|
+
"property_type": self.property_type,
|
|
27
|
+
"required": self.required,
|
|
28
|
+
"unique_index": self.unique_index,
|
|
29
|
+
"index": self.index,
|
|
30
|
+
}
|
|
31
|
+
if self.default is not None:
|
|
32
|
+
d["default"] = repr(self.default)
|
|
33
|
+
if self.choices is not None:
|
|
34
|
+
d["choices"] = list(self.choices)
|
|
35
|
+
return d
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def from_dict(cls, d: dict) -> PropertyIR:
|
|
39
|
+
return cls(
|
|
40
|
+
name=d["name"],
|
|
41
|
+
property_type=d["property_type"],
|
|
42
|
+
required=d.get("required", False),
|
|
43
|
+
unique_index=d.get("unique_index", False),
|
|
44
|
+
index=d.get("index", False),
|
|
45
|
+
default=d.get("default"),
|
|
46
|
+
choices=tuple(d["choices"]) if d.get("choices") else None,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass(frozen=True)
|
|
51
|
+
class RelationshipDefIR:
|
|
52
|
+
"""IR for a relationship definition on a node (e.g. RelationshipTo)."""
|
|
53
|
+
|
|
54
|
+
attr_name: str
|
|
55
|
+
rel_type: str # Neo4j relationship type string, e.g. "FRIENDS_WITH"
|
|
56
|
+
target_label: str # target node class name or label
|
|
57
|
+
direction: str # "to", "from", "either"
|
|
58
|
+
model_kgid: str | None = None # __kgid__ of the StructuredRel model, if any
|
|
59
|
+
|
|
60
|
+
def to_dict(self) -> dict:
|
|
61
|
+
d: dict[str, Any] = {
|
|
62
|
+
"attr_name": self.attr_name,
|
|
63
|
+
"rel_type": self.rel_type,
|
|
64
|
+
"target_label": self.target_label,
|
|
65
|
+
"direction": self.direction,
|
|
66
|
+
}
|
|
67
|
+
if self.model_kgid is not None:
|
|
68
|
+
d["model_kgid"] = self.model_kgid
|
|
69
|
+
return d
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def from_dict(cls, d: dict) -> RelationshipDefIR:
|
|
73
|
+
return cls(
|
|
74
|
+
attr_name=d["attr_name"],
|
|
75
|
+
rel_type=d["rel_type"],
|
|
76
|
+
target_label=d["target_label"],
|
|
77
|
+
direction=d["direction"],
|
|
78
|
+
model_kgid=d.get("model_kgid"),
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@dataclass(frozen=True)
|
|
83
|
+
class NodeIR:
|
|
84
|
+
"""IR for a neomodel StructuredNode class."""
|
|
85
|
+
|
|
86
|
+
kgid: str
|
|
87
|
+
label: str # Neo4j label (defaults to class name)
|
|
88
|
+
class_name: str
|
|
89
|
+
module_path: str
|
|
90
|
+
properties: tuple[PropertyIR, ...] = ()
|
|
91
|
+
relationship_defs: tuple[RelationshipDefIR, ...] = ()
|
|
92
|
+
|
|
93
|
+
def to_dict(self) -> dict:
|
|
94
|
+
return {
|
|
95
|
+
"kgid": self.kgid,
|
|
96
|
+
"label": self.label,
|
|
97
|
+
"class_name": self.class_name,
|
|
98
|
+
"module_path": self.module_path,
|
|
99
|
+
"properties": [p.to_dict() for p in sorted(self.properties)],
|
|
100
|
+
"relationship_defs": [r.to_dict() for r in self.relationship_defs],
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
@classmethod
|
|
104
|
+
def from_dict(cls, d: dict) -> NodeIR:
|
|
105
|
+
return cls(
|
|
106
|
+
kgid=d["kgid"],
|
|
107
|
+
label=d["label"],
|
|
108
|
+
class_name=d["class_name"],
|
|
109
|
+
module_path=d["module_path"],
|
|
110
|
+
properties=tuple(PropertyIR.from_dict(p) for p in d.get("properties", [])),
|
|
111
|
+
relationship_defs=tuple(
|
|
112
|
+
RelationshipDefIR.from_dict(r) for r in d.get("relationship_defs", [])
|
|
113
|
+
),
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@dataclass(frozen=True)
|
|
118
|
+
class RelationshipIR:
|
|
119
|
+
"""IR for a neomodel StructuredRel class."""
|
|
120
|
+
|
|
121
|
+
kgid: str
|
|
122
|
+
rel_type: str # Neo4j relationship type string
|
|
123
|
+
class_name: str
|
|
124
|
+
module_path: str
|
|
125
|
+
properties: tuple[PropertyIR, ...] = ()
|
|
126
|
+
|
|
127
|
+
def to_dict(self) -> dict:
|
|
128
|
+
return {
|
|
129
|
+
"kgid": self.kgid,
|
|
130
|
+
"rel_type": self.rel_type,
|
|
131
|
+
"class_name": self.class_name,
|
|
132
|
+
"module_path": self.module_path,
|
|
133
|
+
"properties": [p.to_dict() for p in sorted(self.properties)],
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
@classmethod
|
|
137
|
+
def from_dict(cls, d: dict) -> RelationshipIR:
|
|
138
|
+
return cls(
|
|
139
|
+
kgid=d["kgid"],
|
|
140
|
+
rel_type=d["rel_type"],
|
|
141
|
+
class_name=d["class_name"],
|
|
142
|
+
module_path=d["module_path"],
|
|
143
|
+
properties=tuple(PropertyIR.from_dict(p) for p in d.get("properties", [])),
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
@dataclass(frozen=True)
|
|
148
|
+
class SchemaSnapshot:
|
|
149
|
+
"""Immutable snapshot of the full schema IR at a point in time."""
|
|
150
|
+
|
|
151
|
+
nodes: tuple[NodeIR, ...]
|
|
152
|
+
relationships: tuple[RelationshipIR, ...]
|
|
153
|
+
created_at: str = field(
|
|
154
|
+
default_factory=lambda: datetime.now(timezone.utc).isoformat()
|
|
155
|
+
)
|
|
156
|
+
schema_hash: str = ""
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def nodes_by_kgid(self) -> dict[str, NodeIR]:
|
|
160
|
+
return {n.kgid: n for n in self.nodes}
|
|
161
|
+
|
|
162
|
+
@property
|
|
163
|
+
def relationships_by_kgid(self) -> dict[str, RelationshipIR]:
|
|
164
|
+
return {r.kgid: r for r in self.relationships}
|
|
165
|
+
|
|
166
|
+
def to_dict(self) -> dict:
|
|
167
|
+
return {
|
|
168
|
+
"nodes": [n.to_dict() for n in sorted(self.nodes, key=lambda n: n.kgid)],
|
|
169
|
+
"relationships": [
|
|
170
|
+
r.to_dict() for r in sorted(self.relationships, key=lambda r: r.kgid)
|
|
171
|
+
],
|
|
172
|
+
"created_at": self.created_at,
|
|
173
|
+
"schema_hash": self.schema_hash,
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
def to_json(self, indent: int = 2) -> str:
|
|
177
|
+
return json.dumps(self.to_dict(), indent=indent, sort_keys=True)
|
|
178
|
+
|
|
179
|
+
@classmethod
|
|
180
|
+
def from_dict(cls, d: dict) -> SchemaSnapshot:
|
|
181
|
+
return cls(
|
|
182
|
+
nodes=tuple(NodeIR.from_dict(n) for n in d.get("nodes", [])),
|
|
183
|
+
relationships=tuple(
|
|
184
|
+
RelationshipIR.from_dict(r) for r in d.get("relationships", [])
|
|
185
|
+
),
|
|
186
|
+
created_at=d.get("created_at", ""),
|
|
187
|
+
schema_hash=d.get("schema_hash", ""),
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
@classmethod
|
|
191
|
+
def from_json(cls, raw: str) -> SchemaSnapshot:
|
|
192
|
+
return cls.from_dict(json.loads(raw))
|
|
193
|
+
|
|
194
|
+
@classmethod
|
|
195
|
+
def empty(cls) -> SchemaSnapshot:
|
|
196
|
+
return cls(nodes=(), relationships=())
|