PyPI - crochet-migration - Versions diffs - 0.1.0__py3-none-any.whl - Mend

crochet-migration 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

crochet/__init__.py +3 -0
crochet/cli.py +327 -0
crochet/config.py +116 -0
crochet/errors.py +75 -0
crochet/ingest/__init__.py +5 -0
crochet/ingest/batch.py +61 -0
crochet/ir/__init__.py +23 -0
crochet/ir/diff.py +199 -0
crochet/ir/hash.py +36 -0
crochet/ir/parser.py +251 -0
crochet/ir/schema.py +196 -0
crochet/ledger/__init__.py +5 -0
crochet/ledger/sqlite.py +282 -0
crochet/migrations/__init__.py +6 -0
crochet/migrations/engine.py +279 -0
crochet/migrations/operations.py +267 -0
crochet/migrations/template.py +105 -0
crochet/scaffold/__init__.py +6 -0
crochet/scaffold/node.py +48 -0
crochet/scaffold/relationship.py +52 -0
crochet/verify.py +141 -0
crochet_migration-0.1.0.dist-info/METADATA +278 -0
crochet_migration-0.1.0.dist-info/RECORD +26 -0
crochet_migration-0.1.0.dist-info/WHEEL +4 -0
crochet_migration-0.1.0.dist-info/entry_points.txt +2 -0
crochet_migration-0.1.0.dist-info/licenses/LICENSE +21 -0

crochet/migrations/operations.py ADDED Viewed

@@ -0,0 +1,267 @@
+"""DDL and data operations available inside migration upgrade/downgrade functions."""
+from __future__ import annotations
+import uuid
+from dataclasses import dataclass, field
+from typing import Any
+@dataclass
+class Operation:
+    """A single recorded operation for audit purposes."""
+    op_type: str
+    details: dict[str, Any]
+class MigrationContext:
+    """Context object passed to upgrade() and downgrade() functions.
+    Wraps Neo4j operations and records everything for auditability.
+    When *dry_run* is ``True`` operations are recorded but not executed.
+    """
+    def __init__(self, driver: Any | None = None, dry_run: bool = False) -> None:
+        self._driver = driver
+        self._dry_run = dry_run
+        self.operations: list[Operation] = []
+        self._batch_id: str | None = None
+    # ------------------------------------------------------------------
+    # Constraints
+    # ------------------------------------------------------------------
+    def add_unique_constraint(self, label: str, property_name: str) -> None:
+        """CREATE CONSTRAINT … REQUIRE (n.prop) IS UNIQUE."""
+        constraint_name = f"crochet_uniq_{label}_{property_name}"
+        cypher = (
+            f"CREATE CONSTRAINT {constraint_name} IF NOT EXISTS "
+            f"FOR (n:{label}) REQUIRE n.{property_name} IS UNIQUE"
+        )
+        self._record_and_run("add_unique_constraint", {
+            "label": label, "property": property_name, "cypher": cypher,
+        })
+    def drop_unique_constraint(self, label: str, property_name: str) -> None:
+        constraint_name = f"crochet_uniq_{label}_{property_name}"
+        cypher = f"DROP CONSTRAINT {constraint_name} IF EXISTS"
+        self._record_and_run("drop_unique_constraint", {
+            "label": label, "property": property_name, "cypher": cypher,
+        })
+    def add_node_property_existence_constraint(
+        self, label: str, property_name: str
+    ) -> None:
+        constraint_name = f"crochet_exists_{label}_{property_name}"
+        cypher = (
+            f"CREATE CONSTRAINT {constraint_name} IF NOT EXISTS "
+            f"FOR (n:{label}) REQUIRE n.{property_name} IS NOT NULL"
+        )
+        self._record_and_run("add_existence_constraint", {
+            "label": label, "property": property_name, "cypher": cypher,
+        })
+    def drop_node_property_existence_constraint(
+        self, label: str, property_name: str
+    ) -> None:
+        constraint_name = f"crochet_exists_{label}_{property_name}"
+        cypher = f"DROP CONSTRAINT {constraint_name} IF EXISTS"
+        self._record_and_run("drop_existence_constraint", {
+            "label": label, "property": property_name, "cypher": cypher,
+        })
+    # ------------------------------------------------------------------
+    # Indexes
+    # ------------------------------------------------------------------
+    def add_index(self, label: str, property_name: str) -> None:
+        index_name = f"crochet_idx_{label}_{property_name}"
+        cypher = (
+            f"CREATE INDEX {index_name} IF NOT EXISTS "
+            f"FOR (n:{label}) ON (n.{property_name})"
+        )
+        self._record_and_run("add_index", {
+            "label": label, "property": property_name, "cypher": cypher,
+        })
+    def drop_index(self, label: str, property_name: str) -> None:
+        index_name = f"crochet_idx_{label}_{property_name}"
+        cypher = f"DROP INDEX {index_name} IF EXISTS"
+        self._record_and_run("drop_index", {
+            "label": label, "property": property_name, "cypher": cypher,
+        })
+    # ------------------------------------------------------------------
+    # Labels / Relationship types
+    # ------------------------------------------------------------------
+    def rename_label(self, old_label: str, new_label: str) -> None:
+        cypher = (
+            f"MATCH (n:{old_label}) "
+            f"SET n:{new_label} REMOVE n:{old_label}"
+        )
+        self._record_and_run("rename_label", {
+            "old_label": old_label, "new_label": new_label, "cypher": cypher,
+        })
+    def rename_relationship_type(self, old_type: str, new_type: str) -> None:
+        cypher = (
+            f"MATCH (a)-[r:{old_type}]->(b) "
+            f"CREATE (a)-[r2:{new_type}]->(b) "
+            f"SET r2 = properties(r) "
+            f"DELETE r"
+        )
+        self._record_and_run("rename_relationship_type", {
+            "old_type": old_type, "new_type": new_type, "cypher": cypher,
+        })
+    # ------------------------------------------------------------------
+    # Properties
+    # ------------------------------------------------------------------
+    def add_node_property(
+        self, label: str, property_name: str, default: Any = None
+    ) -> None:
+        if default is not None:
+            cypher = f"MATCH (n:{label}) SET n.{property_name} = $default"
+            params = {"default": default}
+        else:
+            cypher = None
+            params = None
+        self._record_and_run("add_node_property", {
+            "label": label, "property": property_name,
+            "default": default, "cypher": cypher,
+        }, params=params)
+    def remove_node_property(self, label: str, property_name: str) -> None:
+        cypher = f"MATCH (n:{label}) REMOVE n.{property_name}"
+        self._record_and_run("remove_node_property", {
+            "label": label, "property": property_name, "cypher": cypher,
+        })
+    def rename_node_property(
+        self, label: str, old_name: str, new_name: str
+    ) -> None:
+        cypher = (
+            f"MATCH (n:{label}) "
+            f"SET n.{new_name} = n.{old_name} "
+            f"REMOVE n.{old_name}"
+        )
+        self._record_and_run("rename_node_property", {
+            "label": label, "old_name": old_name, "new_name": new_name,
+            "cypher": cypher,
+        })
+    # ------------------------------------------------------------------
+    # Raw Cypher (escape hatch)
+    # ------------------------------------------------------------------
+    def run_cypher(self, cypher: str, params: dict | None = None) -> Any:
+        """Execute arbitrary Cypher — use sparingly."""
+        return self._record_and_run("run_cypher", {
+            "cypher": cypher, "params": params,
+        }, params=params, cypher_override=cypher)
+    # ------------------------------------------------------------------
+    # Data ingest helpers
+    # ------------------------------------------------------------------
+    def begin_batch(self, batch_id: str | None = None) -> str:
+        """Start a data-ingest batch. Returns the batch ID."""
+        self._batch_id = batch_id or uuid.uuid4().hex[:12]
+        self._record_and_run("begin_batch", {"batch_id": self._batch_id})
+        return self._batch_id
+    @property
+    def batch_id(self) -> str | None:
+        return self._batch_id
+    def create_nodes(
+        self, label: str, data: list[dict[str, Any]]
+    ) -> int:
+        """Create nodes from a list of property dictionaries.
+        Each node is tagged with ``_crochet_batch`` for rollback.
+        """
+        if not data:
+            return 0
+        batch = self._batch_id or "untracked"
+        cypher = (
+            f"UNWIND $rows AS row "
+            f"CREATE (n:{label}) SET n = row, n._crochet_batch = $batch"
+        )
+        self._record_and_run("create_nodes", {
+            "label": label, "count": len(data), "cypher": cypher,
+        }, params={"rows": data, "batch": batch}, cypher_override=cypher)
+        return len(data)
+    def create_relationships(
+        self,
+        source_label: str,
+        target_label: str,
+        rel_type: str,
+        data: list[dict[str, Any]],
+        source_key: str = "source_id",
+        target_key: str = "target_id",
+        properties_key: str = "properties",
+    ) -> int:
+        """Create relationships from structured data rows.
+        Each row must contain *source_key* and *target_key* values, and
+        optionally a *properties_key* dict.
+        """
+        if not data:
+            return 0
+        batch = self._batch_id or "untracked"
+        cypher = (
+            f"UNWIND $rows AS row "
+            f"MATCH (a:{source_label} {{id: row.{source_key}}}) "
+            f"MATCH (b:{target_label} {{id: row.{target_key}}}) "
+            f"CREATE (a)-[r:{rel_type}]->(b) "
+            f"SET r = row.{properties_key}, r._crochet_batch = $batch"
+        )
+        self._record_and_run("create_relationships", {
+            "source_label": source_label, "target_label": target_label,
+            "rel_type": rel_type, "count": len(data), "cypher": cypher,
+        }, params={"rows": data, "batch": batch}, cypher_override=cypher)
+        return len(data)
+    def delete_nodes_by_batch(self, label: str, batch_id: str) -> None:
+        """Delete all nodes of a label that belong to a batch."""
+        cypher = (
+            f"MATCH (n:{label} {{_crochet_batch: $batch}}) DETACH DELETE n"
+        )
+        self._record_and_run("delete_nodes_by_batch", {
+            "label": label, "batch_id": batch_id, "cypher": cypher,
+        }, params={"batch": batch_id}, cypher_override=cypher)
+    def delete_relationships_by_batch(self, rel_type: str, batch_id: str) -> None:
+        """Delete all relationships of a type that belong to a batch."""
+        cypher = (
+            f"MATCH ()-[r:{rel_type} {{_crochet_batch: $batch}}]-() DELETE r"
+        )
+        self._record_and_run("delete_relationships_by_batch", {
+            "rel_type": rel_type, "batch_id": batch_id, "cypher": cypher,
+        }, params={"batch": batch_id}, cypher_override=cypher)
+    # ------------------------------------------------------------------
+    # Internal
+    # ------------------------------------------------------------------
+    def _record_and_run(
+        self,
+        op_type: str,
+        details: dict[str, Any],
+        params: dict | None = None,
+        cypher_override: str | None = None,
+    ) -> Any:
+        self.operations.append(Operation(op_type=op_type, details=details))
+        if self._dry_run or self._driver is None:
+            return None
+        cypher = cypher_override or details.get("cypher")
+        if cypher:
+            with self._driver.session() as session:
+                result = session.run(cypher, **(params or {}))
+                return result.consume()
+        return None

crochet/migrations/template.py ADDED Viewed

@@ -0,0 +1,105 @@
+"""Migration file scaffolding and template generation."""
+from __future__ import annotations
+import re
+from datetime import datetime, timezone
+from pathlib import Path
+_MIGRATION_TEMPLATE = '''\
+"""
+{description}
+Revision: {revision_id}
+Parent:   {parent_id}
+Created:  {created_at}
+Schema:   {schema_hash}
+"""
+from crochet.migrations.operations import MigrationContext
+# -- Migration metadata --------------------------------------------------
+revision_id = "{revision_id}"
+parent_id = {parent_id_repr}
+schema_hash = "{schema_hash}"
+rollback_safe = {rollback_safe}
+def upgrade(ctx: MigrationContext) -> None:
+    """Apply this migration."""
+{upgrade_body}
+def downgrade(ctx: MigrationContext) -> None:
+    """Revert this migration."""
+{downgrade_body}
+'''
+_DIFF_COMMENT_HEADER = "    # Detected schema changes:\n"
+def slugify(text: str) -> str:
+    """Convert a description into a filesystem-safe slug."""
+    text = text.lower().strip()
+    text = re.sub(r"[^a-z0-9]+", "_", text)
+    return text.strip("_")[:60]
+def generate_revision_id(seq: int, description: str) -> str:
+    """Generate a revision id like ``0001_initial``."""
+    slug = slugify(description)
+    return f"{seq:04d}_{slug}"
+def render_migration(
+    revision_id: str,
+    parent_id: str | None,
+    description: str,
+    schema_hash: str,
+    rollback_safe: bool = True,
+    diff_summary: str = "",
+) -> str:
+    """Render a migration file from template."""
+    now = datetime.now(timezone.utc).isoformat()
+    if diff_summary:
+        upgrade_lines = _DIFF_COMMENT_HEADER
+        for line in diff_summary.splitlines():
+            upgrade_lines += f"    # {line}\n"
+        upgrade_lines += "    pass"
+        downgrade_lines = upgrade_lines
+    else:
+        upgrade_lines = "    pass"
+        downgrade_lines = "    pass"
+    return _MIGRATION_TEMPLATE.format(
+        description=description,
+        revision_id=revision_id,
+        parent_id=parent_id or "None",
+        parent_id_repr=repr(parent_id),
+        created_at=now,
+        schema_hash=schema_hash,
+        rollback_safe=rollback_safe,
+        upgrade_body=upgrade_lines,
+        downgrade_body=downgrade_lines,
+    )
+def write_migration_file(
+    migrations_dir: Path,
+    revision_id: str,
+    content: str,
+) -> Path:
+    """Write a migration file to disk and return the path."""
+    migrations_dir.mkdir(parents=True, exist_ok=True)
+    # Ensure __init__.py exists
+    init_path = migrations_dir / "__init__.py"
+    if not init_path.exists():
+        init_path.write_text("")
+    filename = f"{revision_id}.py"
+    file_path = migrations_dir / filename
+    file_path.write_text(content)
+    return file_path

crochet/scaffold/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Scaffolding helpers for neomodel node and relationship models."""
+from crochet.scaffold.node import scaffold_node
+from crochet.scaffold.relationship import scaffold_relationship
+__all__ = ["scaffold_node", "scaffold_relationship"]

crochet/scaffold/node.py ADDED Viewed

@@ -0,0 +1,48 @@
+"""Scaffold a new neomodel StructuredNode file with an immutable __kgid__."""
+from __future__ import annotations
+import uuid
+from pathlib import Path
+_NODE_TEMPLATE = '''\
+"""Node model: {class_name}"""
+from neomodel import StructuredNode, StringProperty
+class {class_name}(StructuredNode):
+    """Graph node representing a {class_name}.
+    The __kgid__ is an immutable identifier for this model's schema identity.
+    It must never change, even if the class or file is renamed.
+    """
+    __kgid__ = "{kgid}"
+    # -- Properties --
+    name = StringProperty(required=True, unique_index=True)
+'''
+def scaffold_node(
+    models_dir: Path,
+    class_name: str,
+    kgid: str | None = None,
+    filename: str | None = None,
+) -> Path:
+    """Write a new node model file and return the path."""
+    models_dir.mkdir(parents=True, exist_ok=True)
+    # Ensure __init__.py
+    init_path = models_dir / "__init__.py"
+    if not init_path.exists():
+        init_path.write_text("")
+    kgid = kgid or f"{class_name.lower()}_{uuid.uuid4().hex[:8]}"
+    fname = filename or f"{class_name.lower()}.py"
+    file_path = models_dir / fname
+    content = _NODE_TEMPLATE.format(class_name=class_name, kgid=kgid)
+    file_path.write_text(content)
+    return file_path

crochet/scaffold/relationship.py ADDED Viewed

@@ -0,0 +1,52 @@
+"""Scaffold a new neomodel StructuredRel file with an immutable __kgid__."""
+from __future__ import annotations
+import uuid
+from pathlib import Path
+_REL_TEMPLATE = '''\
+"""Relationship model: {class_name}"""
+from neomodel import StructuredRel, StringProperty
+class {class_name}(StructuredRel):
+    """Graph relationship representing a {class_name}.
+    The __kgid__ is an immutable identifier for this model's schema identity.
+    It must never change, even if the class or file is renamed.
+    """
+    __kgid__ = "{kgid}"
+    __type__ = "{rel_type}"
+    # -- Properties --
+'''
+def scaffold_relationship(
+    models_dir: Path,
+    class_name: str,
+    rel_type: str | None = None,
+    kgid: str | None = None,
+    filename: str | None = None,
+) -> Path:
+    """Write a new relationship model file and return the path."""
+    models_dir.mkdir(parents=True, exist_ok=True)
+    # Ensure __init__.py
+    init_path = models_dir / "__init__.py"
+    if not init_path.exists():
+        init_path.write_text("")
+    kgid = kgid or f"{class_name.lower()}_{uuid.uuid4().hex[:8]}"
+    rel_type = rel_type or class_name.upper()
+    fname = filename or f"{class_name.lower()}.py"
+    file_path = models_dir / fname
+    content = _REL_TEMPLATE.format(
+        class_name=class_name, kgid=kgid, rel_type=rel_type
+    )
+    file_path.write_text(content)
+    return file_path

crochet/verify.py ADDED Viewed

@@ -0,0 +1,141 @@
+"""Verification logic — ensure ledger, migrations, and graph agree."""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any
+from crochet.config import CrochetConfig
+from crochet.errors import VerificationError
+from crochet.ledger.sqlite import Ledger
+from crochet.migrations.engine import MigrationEngine
+@dataclass
+class VerificationReport:
+    """Result of a verification run."""
+    checks: list[CheckResult] = field(default_factory=list)
+    @property
+    def passed(self) -> bool:
+        return all(c.passed for c in self.checks)
+    def summary(self) -> str:
+        lines: list[str] = []
+        for c in self.checks:
+            icon = "PASS" if c.passed else "FAIL"
+            lines.append(f"[{icon}] {c.name}")
+            if c.details:
+                for d in c.details:
+                    lines.append(f"       {d}")
+        return "\n".join(lines)
+@dataclass
+class CheckResult:
+    name: str
+    passed: bool
+    details: list[str] = field(default_factory=list)
+def verify_project(
+    config: CrochetConfig,
+    ledger: Ledger,
+    driver: Any | None = None,
+) -> VerificationReport:
+    """Run all verification checks and return a report."""
+    report = VerificationReport()
+    # 1. Ledger chain integrity
+    report.checks.append(_check_ledger_chain(ledger))
+    # 2. Migration files match ledger
+    engine = MigrationEngine(config, ledger)
+    report.checks.append(_check_migration_files_match_ledger(engine, ledger))
+    # 3. No pending migrations
+    report.checks.append(_check_no_pending(engine))
+    # 4. Schema hash consistency
+    report.checks.append(_check_schema_hashes(engine, ledger))
+    # 5. Neo4j connectivity (if driver provided)
+    if driver is not None:
+        report.checks.append(_check_neo4j_connectivity(driver))
+    return report
+def _check_ledger_chain(ledger: Ledger) -> CheckResult:
+    issues = ledger.verify_chain()
+    if issues:
+        return CheckResult(
+            name="Ledger chain integrity",
+            passed=False,
+            details=issues,
+        )
+    return CheckResult(name="Ledger chain integrity", passed=True)
+def _check_migration_files_match_ledger(
+    engine: MigrationEngine, ledger: Ledger
+) -> CheckResult:
+    """Every applied migration in the ledger must have a corresponding file."""
+    applied = ledger.get_applied_migrations()
+    discovered = {m.revision_id for m in engine.discover_migrations()}
+    missing: list[str] = []
+    for am in applied:
+        if am.revision_id not in discovered:
+            missing.append(f"Ledger references '{am.revision_id}' but no file found.")
+    if missing:
+        return CheckResult(
+            name="Migration files present",
+            passed=False,
+            details=missing,
+        )
+    return CheckResult(name="Migration files present", passed=True)
+def _check_no_pending(engine: MigrationEngine) -> CheckResult:
+    pending = engine.pending_migrations()
+    if pending:
+        return CheckResult(
+            name="No pending migrations",
+            passed=False,
+            details=[f"Pending: {m.revision_id}" for m in pending],
+        )
+    return CheckResult(name="No pending migrations", passed=True)
+def _check_schema_hashes(engine: MigrationEngine, ledger: Ledger) -> CheckResult:
+    """Check that schema hashes in migration files match the ledger."""
+    applied = {m.revision_id: m for m in ledger.get_applied_migrations()}
+    issues: list[str] = []
+    for mf in engine.discover_migrations():
+        am = applied.get(mf.revision_id)
+        if am and mf.schema_hash and am.schema_hash != mf.schema_hash:
+            issues.append(
+                f"Hash mismatch for '{mf.revision_id}': "
+                f"file={mf.schema_hash[:12]}… ledger={am.schema_hash[:12]}…"
+            )
+    if issues:
+        return CheckResult(
+            name="Schema hash consistency",
+            passed=False,
+            details=issues,
+        )
+    return CheckResult(name="Schema hash consistency", passed=True)
+def _check_neo4j_connectivity(driver: Any) -> CheckResult:
+    try:
+        with driver.session() as session:
+            session.run("RETURN 1")
+        return CheckResult(name="Neo4j connectivity", passed=True)
+    except Exception as exc:
+        return CheckResult(
+            name="Neo4j connectivity",
+            passed=False,
+            details=[str(exc)],
+        )