crochet-migration 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,267 @@
1
+ """DDL and data operations available inside migration upgrade/downgrade functions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import uuid
6
+ from dataclasses import dataclass, field
7
+ from typing import Any
8
+
9
+
10
+ @dataclass
11
+ class Operation:
12
+ """A single recorded operation for audit purposes."""
13
+
14
+ op_type: str
15
+ details: dict[str, Any]
16
+
17
+
18
+ class MigrationContext:
19
+ """Context object passed to upgrade() and downgrade() functions.
20
+
21
+ Wraps Neo4j operations and records everything for auditability.
22
+ When *dry_run* is ``True`` operations are recorded but not executed.
23
+ """
24
+
25
+ def __init__(self, driver: Any | None = None, dry_run: bool = False) -> None:
26
+ self._driver = driver
27
+ self._dry_run = dry_run
28
+ self.operations: list[Operation] = []
29
+ self._batch_id: str | None = None
30
+
31
+ # ------------------------------------------------------------------
32
+ # Constraints
33
+ # ------------------------------------------------------------------
34
+
35
+ def add_unique_constraint(self, label: str, property_name: str) -> None:
36
+ """CREATE CONSTRAINT … REQUIRE (n.prop) IS UNIQUE."""
37
+ constraint_name = f"crochet_uniq_{label}_{property_name}"
38
+ cypher = (
39
+ f"CREATE CONSTRAINT {constraint_name} IF NOT EXISTS "
40
+ f"FOR (n:{label}) REQUIRE n.{property_name} IS UNIQUE"
41
+ )
42
+ self._record_and_run("add_unique_constraint", {
43
+ "label": label, "property": property_name, "cypher": cypher,
44
+ })
45
+
46
+ def drop_unique_constraint(self, label: str, property_name: str) -> None:
47
+ constraint_name = f"crochet_uniq_{label}_{property_name}"
48
+ cypher = f"DROP CONSTRAINT {constraint_name} IF EXISTS"
49
+ self._record_and_run("drop_unique_constraint", {
50
+ "label": label, "property": property_name, "cypher": cypher,
51
+ })
52
+
53
+ def add_node_property_existence_constraint(
54
+ self, label: str, property_name: str
55
+ ) -> None:
56
+ constraint_name = f"crochet_exists_{label}_{property_name}"
57
+ cypher = (
58
+ f"CREATE CONSTRAINT {constraint_name} IF NOT EXISTS "
59
+ f"FOR (n:{label}) REQUIRE n.{property_name} IS NOT NULL"
60
+ )
61
+ self._record_and_run("add_existence_constraint", {
62
+ "label": label, "property": property_name, "cypher": cypher,
63
+ })
64
+
65
+ def drop_node_property_existence_constraint(
66
+ self, label: str, property_name: str
67
+ ) -> None:
68
+ constraint_name = f"crochet_exists_{label}_{property_name}"
69
+ cypher = f"DROP CONSTRAINT {constraint_name} IF EXISTS"
70
+ self._record_and_run("drop_existence_constraint", {
71
+ "label": label, "property": property_name, "cypher": cypher,
72
+ })
73
+
74
+ # ------------------------------------------------------------------
75
+ # Indexes
76
+ # ------------------------------------------------------------------
77
+
78
+ def add_index(self, label: str, property_name: str) -> None:
79
+ index_name = f"crochet_idx_{label}_{property_name}"
80
+ cypher = (
81
+ f"CREATE INDEX {index_name} IF NOT EXISTS "
82
+ f"FOR (n:{label}) ON (n.{property_name})"
83
+ )
84
+ self._record_and_run("add_index", {
85
+ "label": label, "property": property_name, "cypher": cypher,
86
+ })
87
+
88
+ def drop_index(self, label: str, property_name: str) -> None:
89
+ index_name = f"crochet_idx_{label}_{property_name}"
90
+ cypher = f"DROP INDEX {index_name} IF EXISTS"
91
+ self._record_and_run("drop_index", {
92
+ "label": label, "property": property_name, "cypher": cypher,
93
+ })
94
+
95
+ # ------------------------------------------------------------------
96
+ # Labels / Relationship types
97
+ # ------------------------------------------------------------------
98
+
99
+ def rename_label(self, old_label: str, new_label: str) -> None:
100
+ cypher = (
101
+ f"MATCH (n:{old_label}) "
102
+ f"SET n:{new_label} REMOVE n:{old_label}"
103
+ )
104
+ self._record_and_run("rename_label", {
105
+ "old_label": old_label, "new_label": new_label, "cypher": cypher,
106
+ })
107
+
108
+ def rename_relationship_type(self, old_type: str, new_type: str) -> None:
109
+ cypher = (
110
+ f"MATCH (a)-[r:{old_type}]->(b) "
111
+ f"CREATE (a)-[r2:{new_type}]->(b) "
112
+ f"SET r2 = properties(r) "
113
+ f"DELETE r"
114
+ )
115
+ self._record_and_run("rename_relationship_type", {
116
+ "old_type": old_type, "new_type": new_type, "cypher": cypher,
117
+ })
118
+
119
+ # ------------------------------------------------------------------
120
+ # Properties
121
+ # ------------------------------------------------------------------
122
+
123
+ def add_node_property(
124
+ self, label: str, property_name: str, default: Any = None
125
+ ) -> None:
126
+ if default is not None:
127
+ cypher = f"MATCH (n:{label}) SET n.{property_name} = $default"
128
+ params = {"default": default}
129
+ else:
130
+ cypher = None
131
+ params = None
132
+ self._record_and_run("add_node_property", {
133
+ "label": label, "property": property_name,
134
+ "default": default, "cypher": cypher,
135
+ }, params=params)
136
+
137
+ def remove_node_property(self, label: str, property_name: str) -> None:
138
+ cypher = f"MATCH (n:{label}) REMOVE n.{property_name}"
139
+ self._record_and_run("remove_node_property", {
140
+ "label": label, "property": property_name, "cypher": cypher,
141
+ })
142
+
143
+ def rename_node_property(
144
+ self, label: str, old_name: str, new_name: str
145
+ ) -> None:
146
+ cypher = (
147
+ f"MATCH (n:{label}) "
148
+ f"SET n.{new_name} = n.{old_name} "
149
+ f"REMOVE n.{old_name}"
150
+ )
151
+ self._record_and_run("rename_node_property", {
152
+ "label": label, "old_name": old_name, "new_name": new_name,
153
+ "cypher": cypher,
154
+ })
155
+
156
+ # ------------------------------------------------------------------
157
+ # Raw Cypher (escape hatch)
158
+ # ------------------------------------------------------------------
159
+
160
+ def run_cypher(self, cypher: str, params: dict | None = None) -> Any:
161
+ """Execute arbitrary Cypher — use sparingly."""
162
+ return self._record_and_run("run_cypher", {
163
+ "cypher": cypher, "params": params,
164
+ }, params=params, cypher_override=cypher)
165
+
166
+ # ------------------------------------------------------------------
167
+ # Data ingest helpers
168
+ # ------------------------------------------------------------------
169
+
170
+ def begin_batch(self, batch_id: str | None = None) -> str:
171
+ """Start a data-ingest batch. Returns the batch ID."""
172
+ self._batch_id = batch_id or uuid.uuid4().hex[:12]
173
+ self._record_and_run("begin_batch", {"batch_id": self._batch_id})
174
+ return self._batch_id
175
+
176
+ @property
177
+ def batch_id(self) -> str | None:
178
+ return self._batch_id
179
+
180
+ def create_nodes(
181
+ self, label: str, data: list[dict[str, Any]]
182
+ ) -> int:
183
+ """Create nodes from a list of property dictionaries.
184
+
185
+ Each node is tagged with ``_crochet_batch`` for rollback.
186
+ """
187
+ if not data:
188
+ return 0
189
+ batch = self._batch_id or "untracked"
190
+ cypher = (
191
+ f"UNWIND $rows AS row "
192
+ f"CREATE (n:{label}) SET n = row, n._crochet_batch = $batch"
193
+ )
194
+ self._record_and_run("create_nodes", {
195
+ "label": label, "count": len(data), "cypher": cypher,
196
+ }, params={"rows": data, "batch": batch}, cypher_override=cypher)
197
+ return len(data)
198
+
199
+ def create_relationships(
200
+ self,
201
+ source_label: str,
202
+ target_label: str,
203
+ rel_type: str,
204
+ data: list[dict[str, Any]],
205
+ source_key: str = "source_id",
206
+ target_key: str = "target_id",
207
+ properties_key: str = "properties",
208
+ ) -> int:
209
+ """Create relationships from structured data rows.
210
+
211
+ Each row must contain *source_key* and *target_key* values, and
212
+ optionally a *properties_key* dict.
213
+ """
214
+ if not data:
215
+ return 0
216
+ batch = self._batch_id or "untracked"
217
+ cypher = (
218
+ f"UNWIND $rows AS row "
219
+ f"MATCH (a:{source_label} {{id: row.{source_key}}}) "
220
+ f"MATCH (b:{target_label} {{id: row.{target_key}}}) "
221
+ f"CREATE (a)-[r:{rel_type}]->(b) "
222
+ f"SET r = row.{properties_key}, r._crochet_batch = $batch"
223
+ )
224
+ self._record_and_run("create_relationships", {
225
+ "source_label": source_label, "target_label": target_label,
226
+ "rel_type": rel_type, "count": len(data), "cypher": cypher,
227
+ }, params={"rows": data, "batch": batch}, cypher_override=cypher)
228
+ return len(data)
229
+
230
+ def delete_nodes_by_batch(self, label: str, batch_id: str) -> None:
231
+ """Delete all nodes of a label that belong to a batch."""
232
+ cypher = (
233
+ f"MATCH (n:{label} {{_crochet_batch: $batch}}) DETACH DELETE n"
234
+ )
235
+ self._record_and_run("delete_nodes_by_batch", {
236
+ "label": label, "batch_id": batch_id, "cypher": cypher,
237
+ }, params={"batch": batch_id}, cypher_override=cypher)
238
+
239
+ def delete_relationships_by_batch(self, rel_type: str, batch_id: str) -> None:
240
+ """Delete all relationships of a type that belong to a batch."""
241
+ cypher = (
242
+ f"MATCH ()-[r:{rel_type} {{_crochet_batch: $batch}}]-() DELETE r"
243
+ )
244
+ self._record_and_run("delete_relationships_by_batch", {
245
+ "rel_type": rel_type, "batch_id": batch_id, "cypher": cypher,
246
+ }, params={"batch": batch_id}, cypher_override=cypher)
247
+
248
+ # ------------------------------------------------------------------
249
+ # Internal
250
+ # ------------------------------------------------------------------
251
+
252
+ def _record_and_run(
253
+ self,
254
+ op_type: str,
255
+ details: dict[str, Any],
256
+ params: dict | None = None,
257
+ cypher_override: str | None = None,
258
+ ) -> Any:
259
+ self.operations.append(Operation(op_type=op_type, details=details))
260
+ if self._dry_run or self._driver is None:
261
+ return None
262
+ cypher = cypher_override or details.get("cypher")
263
+ if cypher:
264
+ with self._driver.session() as session:
265
+ result = session.run(cypher, **(params or {}))
266
+ return result.consume()
267
+ return None
@@ -0,0 +1,105 @@
1
+ """Migration file scaffolding and template generation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from datetime import datetime, timezone
7
+ from pathlib import Path
8
+
9
+ _MIGRATION_TEMPLATE = '''\
10
+ """
11
+ {description}
12
+
13
+ Revision: {revision_id}
14
+ Parent: {parent_id}
15
+ Created: {created_at}
16
+ Schema: {schema_hash}
17
+ """
18
+
19
+ from crochet.migrations.operations import MigrationContext
20
+
21
+ # -- Migration metadata --------------------------------------------------
22
+
23
+ revision_id = "{revision_id}"
24
+ parent_id = {parent_id_repr}
25
+ schema_hash = "{schema_hash}"
26
+ rollback_safe = {rollback_safe}
27
+
28
+
29
+ def upgrade(ctx: MigrationContext) -> None:
30
+ """Apply this migration."""
31
+ {upgrade_body}
32
+
33
+
34
+ def downgrade(ctx: MigrationContext) -> None:
35
+ """Revert this migration."""
36
+ {downgrade_body}
37
+ '''
38
+
39
+ _DIFF_COMMENT_HEADER = " # Detected schema changes:\n"
40
+
41
+
42
+ def slugify(text: str) -> str:
43
+ """Convert a description into a filesystem-safe slug."""
44
+ text = text.lower().strip()
45
+ text = re.sub(r"[^a-z0-9]+", "_", text)
46
+ return text.strip("_")[:60]
47
+
48
+
49
+ def generate_revision_id(seq: int, description: str) -> str:
50
+ """Generate a revision id like ``0001_initial``."""
51
+ slug = slugify(description)
52
+ return f"{seq:04d}_{slug}"
53
+
54
+
55
+ def render_migration(
56
+ revision_id: str,
57
+ parent_id: str | None,
58
+ description: str,
59
+ schema_hash: str,
60
+ rollback_safe: bool = True,
61
+ diff_summary: str = "",
62
+ ) -> str:
63
+ """Render a migration file from template."""
64
+ now = datetime.now(timezone.utc).isoformat()
65
+
66
+ if diff_summary:
67
+ upgrade_lines = _DIFF_COMMENT_HEADER
68
+ for line in diff_summary.splitlines():
69
+ upgrade_lines += f" # {line}\n"
70
+ upgrade_lines += " pass"
71
+ downgrade_lines = upgrade_lines
72
+ else:
73
+ upgrade_lines = " pass"
74
+ downgrade_lines = " pass"
75
+
76
+ return _MIGRATION_TEMPLATE.format(
77
+ description=description,
78
+ revision_id=revision_id,
79
+ parent_id=parent_id or "None",
80
+ parent_id_repr=repr(parent_id),
81
+ created_at=now,
82
+ schema_hash=schema_hash,
83
+ rollback_safe=rollback_safe,
84
+ upgrade_body=upgrade_lines,
85
+ downgrade_body=downgrade_lines,
86
+ )
87
+
88
+
89
+ def write_migration_file(
90
+ migrations_dir: Path,
91
+ revision_id: str,
92
+ content: str,
93
+ ) -> Path:
94
+ """Write a migration file to disk and return the path."""
95
+ migrations_dir.mkdir(parents=True, exist_ok=True)
96
+
97
+ # Ensure __init__.py exists
98
+ init_path = migrations_dir / "__init__.py"
99
+ if not init_path.exists():
100
+ init_path.write_text("")
101
+
102
+ filename = f"{revision_id}.py"
103
+ file_path = migrations_dir / filename
104
+ file_path.write_text(content)
105
+ return file_path
@@ -0,0 +1,6 @@
1
+ """Scaffolding helpers for neomodel node and relationship models."""
2
+
3
+ from crochet.scaffold.node import scaffold_node
4
+ from crochet.scaffold.relationship import scaffold_relationship
5
+
6
+ __all__ = ["scaffold_node", "scaffold_relationship"]
@@ -0,0 +1,48 @@
1
+ """Scaffold a new neomodel StructuredNode file with an immutable __kgid__."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import uuid
6
+ from pathlib import Path
7
+
8
+ _NODE_TEMPLATE = '''\
9
+ """Node model: {class_name}"""
10
+
11
+ from neomodel import StructuredNode, StringProperty
12
+
13
+
14
+ class {class_name}(StructuredNode):
15
+ """Graph node representing a {class_name}.
16
+
17
+ The __kgid__ is an immutable identifier for this model's schema identity.
18
+ It must never change, even if the class or file is renamed.
19
+ """
20
+
21
+ __kgid__ = "{kgid}"
22
+
23
+ # -- Properties --
24
+ name = StringProperty(required=True, unique_index=True)
25
+ '''
26
+
27
+
28
+ def scaffold_node(
29
+ models_dir: Path,
30
+ class_name: str,
31
+ kgid: str | None = None,
32
+ filename: str | None = None,
33
+ ) -> Path:
34
+ """Write a new node model file and return the path."""
35
+ models_dir.mkdir(parents=True, exist_ok=True)
36
+
37
+ # Ensure __init__.py
38
+ init_path = models_dir / "__init__.py"
39
+ if not init_path.exists():
40
+ init_path.write_text("")
41
+
42
+ kgid = kgid or f"{class_name.lower()}_{uuid.uuid4().hex[:8]}"
43
+ fname = filename or f"{class_name.lower()}.py"
44
+ file_path = models_dir / fname
45
+
46
+ content = _NODE_TEMPLATE.format(class_name=class_name, kgid=kgid)
47
+ file_path.write_text(content)
48
+ return file_path
@@ -0,0 +1,52 @@
1
+ """Scaffold a new neomodel StructuredRel file with an immutable __kgid__."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import uuid
6
+ from pathlib import Path
7
+
8
+ _REL_TEMPLATE = '''\
9
+ """Relationship model: {class_name}"""
10
+
11
+ from neomodel import StructuredRel, StringProperty
12
+
13
+
14
+ class {class_name}(StructuredRel):
15
+ """Graph relationship representing a {class_name}.
16
+
17
+ The __kgid__ is an immutable identifier for this model's schema identity.
18
+ It must never change, even if the class or file is renamed.
19
+ """
20
+
21
+ __kgid__ = "{kgid}"
22
+ __type__ = "{rel_type}"
23
+
24
+ # -- Properties --
25
+ '''
26
+
27
+
28
+ def scaffold_relationship(
29
+ models_dir: Path,
30
+ class_name: str,
31
+ rel_type: str | None = None,
32
+ kgid: str | None = None,
33
+ filename: str | None = None,
34
+ ) -> Path:
35
+ """Write a new relationship model file and return the path."""
36
+ models_dir.mkdir(parents=True, exist_ok=True)
37
+
38
+ # Ensure __init__.py
39
+ init_path = models_dir / "__init__.py"
40
+ if not init_path.exists():
41
+ init_path.write_text("")
42
+
43
+ kgid = kgid or f"{class_name.lower()}_{uuid.uuid4().hex[:8]}"
44
+ rel_type = rel_type or class_name.upper()
45
+ fname = filename or f"{class_name.lower()}.py"
46
+ file_path = models_dir / fname
47
+
48
+ content = _REL_TEMPLATE.format(
49
+ class_name=class_name, kgid=kgid, rel_type=rel_type
50
+ )
51
+ file_path.write_text(content)
52
+ return file_path
crochet/verify.py ADDED
@@ -0,0 +1,141 @@
1
+ """Verification logic — ensure ledger, migrations, and graph agree."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Any
7
+
8
+ from crochet.config import CrochetConfig
9
+ from crochet.errors import VerificationError
10
+ from crochet.ledger.sqlite import Ledger
11
+ from crochet.migrations.engine import MigrationEngine
12
+
13
+
14
+ @dataclass
15
+ class VerificationReport:
16
+ """Result of a verification run."""
17
+
18
+ checks: list[CheckResult] = field(default_factory=list)
19
+
20
+ @property
21
+ def passed(self) -> bool:
22
+ return all(c.passed for c in self.checks)
23
+
24
+ def summary(self) -> str:
25
+ lines: list[str] = []
26
+ for c in self.checks:
27
+ icon = "PASS" if c.passed else "FAIL"
28
+ lines.append(f"[{icon}] {c.name}")
29
+ if c.details:
30
+ for d in c.details:
31
+ lines.append(f" {d}")
32
+ return "\n".join(lines)
33
+
34
+
35
+ @dataclass
36
+ class CheckResult:
37
+ name: str
38
+ passed: bool
39
+ details: list[str] = field(default_factory=list)
40
+
41
+
42
+ def verify_project(
43
+ config: CrochetConfig,
44
+ ledger: Ledger,
45
+ driver: Any | None = None,
46
+ ) -> VerificationReport:
47
+ """Run all verification checks and return a report."""
48
+ report = VerificationReport()
49
+
50
+ # 1. Ledger chain integrity
51
+ report.checks.append(_check_ledger_chain(ledger))
52
+
53
+ # 2. Migration files match ledger
54
+ engine = MigrationEngine(config, ledger)
55
+ report.checks.append(_check_migration_files_match_ledger(engine, ledger))
56
+
57
+ # 3. No pending migrations
58
+ report.checks.append(_check_no_pending(engine))
59
+
60
+ # 4. Schema hash consistency
61
+ report.checks.append(_check_schema_hashes(engine, ledger))
62
+
63
+ # 5. Neo4j connectivity (if driver provided)
64
+ if driver is not None:
65
+ report.checks.append(_check_neo4j_connectivity(driver))
66
+
67
+ return report
68
+
69
+
70
+ def _check_ledger_chain(ledger: Ledger) -> CheckResult:
71
+ issues = ledger.verify_chain()
72
+ if issues:
73
+ return CheckResult(
74
+ name="Ledger chain integrity",
75
+ passed=False,
76
+ details=issues,
77
+ )
78
+ return CheckResult(name="Ledger chain integrity", passed=True)
79
+
80
+
81
+ def _check_migration_files_match_ledger(
82
+ engine: MigrationEngine, ledger: Ledger
83
+ ) -> CheckResult:
84
+ """Every applied migration in the ledger must have a corresponding file."""
85
+ applied = ledger.get_applied_migrations()
86
+ discovered = {m.revision_id for m in engine.discover_migrations()}
87
+ missing: list[str] = []
88
+ for am in applied:
89
+ if am.revision_id not in discovered:
90
+ missing.append(f"Ledger references '{am.revision_id}' but no file found.")
91
+ if missing:
92
+ return CheckResult(
93
+ name="Migration files present",
94
+ passed=False,
95
+ details=missing,
96
+ )
97
+ return CheckResult(name="Migration files present", passed=True)
98
+
99
+
100
+ def _check_no_pending(engine: MigrationEngine) -> CheckResult:
101
+ pending = engine.pending_migrations()
102
+ if pending:
103
+ return CheckResult(
104
+ name="No pending migrations",
105
+ passed=False,
106
+ details=[f"Pending: {m.revision_id}" for m in pending],
107
+ )
108
+ return CheckResult(name="No pending migrations", passed=True)
109
+
110
+
111
+ def _check_schema_hashes(engine: MigrationEngine, ledger: Ledger) -> CheckResult:
112
+ """Check that schema hashes in migration files match the ledger."""
113
+ applied = {m.revision_id: m for m in ledger.get_applied_migrations()}
114
+ issues: list[str] = []
115
+ for mf in engine.discover_migrations():
116
+ am = applied.get(mf.revision_id)
117
+ if am and mf.schema_hash and am.schema_hash != mf.schema_hash:
118
+ issues.append(
119
+ f"Hash mismatch for '{mf.revision_id}': "
120
+ f"file={mf.schema_hash[:12]}… ledger={am.schema_hash[:12]}…"
121
+ )
122
+ if issues:
123
+ return CheckResult(
124
+ name="Schema hash consistency",
125
+ passed=False,
126
+ details=issues,
127
+ )
128
+ return CheckResult(name="Schema hash consistency", passed=True)
129
+
130
+
131
+ def _check_neo4j_connectivity(driver: Any) -> CheckResult:
132
+ try:
133
+ with driver.session() as session:
134
+ session.run("RETURN 1")
135
+ return CheckResult(name="Neo4j connectivity", passed=True)
136
+ except Exception as exc:
137
+ return CheckResult(
138
+ name="Neo4j connectivity",
139
+ passed=False,
140
+ details=[str(exc)],
141
+ )