crochet-migration 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
crochet/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """Crochet — Versioned schema & data migrations for neomodel Neo4j graphs."""
2
+
3
+ __version__ = "0.1.0"
crochet/cli.py ADDED
@@ -0,0 +1,327 @@
1
+ """Crochet CLI — command-line interface for managing neomodel migrations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import click
9
+
10
+ from crochet import __version__
11
+ from crochet.config import CrochetConfig, load_config, find_project_root
12
+ from crochet.errors import CrochetError, ProjectNotInitializedError
13
+ from crochet.ledger.sqlite import Ledger
14
+
15
+
16
+ def _get_config(ctx: click.Context) -> CrochetConfig:
17
+ """Load config, attaching it to the Click context."""
18
+ if "config" not in ctx.obj:
19
+ ctx.obj["config"] = load_config()
20
+ return ctx.obj["config"]
21
+
22
+
23
+ def _get_ledger(ctx: click.Context) -> Ledger:
24
+ """Open the ledger, attaching it to the Click context for cleanup."""
25
+ if "ledger" not in ctx.obj:
26
+ config = _get_config(ctx)
27
+ ctx.obj["ledger"] = Ledger(config.ledger_file)
28
+ return ctx.obj["ledger"]
29
+
30
+
31
+ # ======================================================================
32
+ # Root group
33
+ # ======================================================================
34
+
35
+
36
+ @click.group()
37
+ @click.version_option(__version__, prog_name="crochet")
38
+ @click.pass_context
39
+ def main(ctx: click.Context) -> None:
40
+ """Crochet — versioned schema & data migrations for neomodel graphs."""
41
+ ctx.ensure_object(dict)
42
+
43
+
44
+ # ======================================================================
45
+ # new-project
46
+ # ======================================================================
47
+
48
+
49
+ @main.command("new-project")
50
+ @click.option("--name", default="my-graph", help="Project name.")
51
+ @click.option(
52
+ "--path",
53
+ type=click.Path(),
54
+ default=".",
55
+ help="Directory to initialize (default: current directory).",
56
+ )
57
+ @click.pass_context
58
+ def new_project(ctx: click.Context, name: str, path: str) -> None:
59
+ """Initialize a new Crochet project."""
60
+ root = Path(path).resolve()
61
+
62
+ config = CrochetConfig(project_name=name, project_root=root)
63
+ config.save()
64
+
65
+ # Create directories
66
+ config.models_dir.mkdir(parents=True, exist_ok=True)
67
+ (config.models_dir / "__init__.py").touch()
68
+ config.migrations_dir.mkdir(parents=True, exist_ok=True)
69
+ (config.migrations_dir / "__init__.py").touch()
70
+
71
+ # Initialize ledger
72
+ with Ledger(config.ledger_file):
73
+ pass
74
+
75
+ click.echo(f"Initialized crochet project '{name}' at {root}")
76
+ click.echo(f" config: {root / 'crochet.toml'}")
77
+ click.echo(f" models: {config.models_dir}")
78
+ click.echo(f" migrations: {config.migrations_dir}")
79
+ click.echo(f" ledger: {config.ledger_file}")
80
+
81
+
82
+ # ======================================================================
83
+ # create-node
84
+ # ======================================================================
85
+
86
+
87
+ @main.command("create-node")
88
+ @click.argument("class_name")
89
+ @click.option("--kgid", default=None, help="Explicit __kgid__ (auto-generated if omitted).")
90
+ @click.pass_context
91
+ def create_node(ctx: click.Context, class_name: str, kgid: str | None) -> None:
92
+ """Scaffold a new StructuredNode model file."""
93
+ from crochet.scaffold.node import scaffold_node
94
+
95
+ config = _get_config(ctx)
96
+ path = scaffold_node(config.models_dir, class_name, kgid=kgid)
97
+ click.echo(f"Created node model: {path}")
98
+
99
+
100
+ # ======================================================================
101
+ # create-relationship
102
+ # ======================================================================
103
+
104
+
105
+ @main.command("create-relationship")
106
+ @click.argument("class_name")
107
+ @click.option("--rel-type", default=None, help="Neo4j relationship type (default: CLASS_NAME).")
108
+ @click.option("--kgid", default=None, help="Explicit __kgid__ (auto-generated if omitted).")
109
+ @click.pass_context
110
+ def create_relationship(
111
+ ctx: click.Context, class_name: str, rel_type: str | None, kgid: str | None
112
+ ) -> None:
113
+ """Scaffold a new StructuredRel model file."""
114
+ from crochet.scaffold.relationship import scaffold_relationship
115
+
116
+ config = _get_config(ctx)
117
+ path = scaffold_relationship(
118
+ config.models_dir, class_name, rel_type=rel_type, kgid=kgid
119
+ )
120
+ click.echo(f"Created relationship model: {path}")
121
+
122
+
123
+ # ======================================================================
124
+ # create-migration
125
+ # ======================================================================
126
+
127
+
128
+ @main.command("create-migration")
129
+ @click.argument("description")
130
+ @click.option("--no-snapshot", is_flag=True, help="Skip schema snapshot.")
131
+ @click.option("--unsafe", is_flag=True, help="Mark migration as rollback-unsafe.")
132
+ @click.pass_context
133
+ def create_migration(
134
+ ctx: click.Context, description: str, no_snapshot: bool, unsafe: bool
135
+ ) -> None:
136
+ """Create a new migration file."""
137
+ from crochet.ir.parser import parse_models_directory
138
+ from crochet.migrations.engine import MigrationEngine
139
+
140
+ config = _get_config(ctx)
141
+ ledger = _get_ledger(ctx)
142
+ engine = MigrationEngine(config, ledger)
143
+
144
+ snapshot = None
145
+ if not no_snapshot:
146
+ try:
147
+ snapshot = parse_models_directory(config.models_dir)
148
+ except CrochetError as exc:
149
+ click.echo(f"Warning: could not parse models: {exc}", err=True)
150
+ snapshot = None
151
+
152
+ path = engine.create_migration(
153
+ description=description,
154
+ current_snapshot=snapshot,
155
+ rollback_safe=not unsafe,
156
+ )
157
+ click.echo(f"Created migration: {path}")
158
+ if snapshot:
159
+ click.echo(f" schema hash: {snapshot.schema_hash[:16]}…")
160
+
161
+
162
+ # ======================================================================
163
+ # upgrade
164
+ # ======================================================================
165
+
166
+
167
+ @main.command()
168
+ @click.option("--target", default=None, help="Migrate up to this revision.")
169
+ @click.option("--dry-run", is_flag=True, help="Show what would be applied without executing.")
170
+ @click.pass_context
171
+ def upgrade(ctx: click.Context, target: str | None, dry_run: bool) -> None:
172
+ """Apply pending migrations."""
173
+ from crochet.migrations.engine import MigrationEngine
174
+
175
+ config = _get_config(ctx)
176
+ ledger = _get_ledger(ctx)
177
+ engine = MigrationEngine(config, ledger)
178
+
179
+ driver = _try_connect_neo4j(config) if not dry_run else None
180
+
181
+ try:
182
+ applied = engine.upgrade(target=target, driver=driver, dry_run=dry_run)
183
+ finally:
184
+ if driver:
185
+ driver.close()
186
+
187
+ if not applied:
188
+ click.echo("Nothing to apply — already up to date.")
189
+ else:
190
+ prefix = "[dry-run] " if dry_run else ""
191
+ for rev in applied:
192
+ click.echo(f"{prefix}Applied: {rev}")
193
+ click.echo(f"{prefix}{len(applied)} migration(s) applied.")
194
+
195
+
196
+ # ======================================================================
197
+ # downgrade
198
+ # ======================================================================
199
+
200
+
201
+ @main.command()
202
+ @click.option("--target", default=None, help="Revert down to (but not including) this revision.")
203
+ @click.option("--dry-run", is_flag=True, help="Show what would be reverted without executing.")
204
+ @click.pass_context
205
+ def downgrade(ctx: click.Context, target: str | None, dry_run: bool) -> None:
206
+ """Revert the most recent migration (or down to --target)."""
207
+ from crochet.migrations.engine import MigrationEngine
208
+
209
+ config = _get_config(ctx)
210
+ ledger = _get_ledger(ctx)
211
+ engine = MigrationEngine(config, ledger)
212
+
213
+ driver = _try_connect_neo4j(config) if not dry_run else None
214
+
215
+ try:
216
+ reverted = engine.downgrade(target=target, driver=driver, dry_run=dry_run)
217
+ except CrochetError as exc:
218
+ raise click.ClickException(str(exc)) from exc
219
+ finally:
220
+ if driver:
221
+ driver.close()
222
+
223
+ if not reverted:
224
+ click.echo("Nothing to revert.")
225
+ else:
226
+ prefix = "[dry-run] " if dry_run else ""
227
+ for rev in reverted:
228
+ click.echo(f"{prefix}Reverted: {rev}")
229
+ click.echo(f"{prefix}{len(reverted)} migration(s) reverted.")
230
+
231
+
232
+ # ======================================================================
233
+ # status
234
+ # ======================================================================
235
+
236
+
237
+ @main.command()
238
+ @click.pass_context
239
+ def status(ctx: click.Context) -> None:
240
+ """Show migration status."""
241
+ from crochet.migrations.engine import MigrationEngine
242
+
243
+ config = _get_config(ctx)
244
+ ledger = _get_ledger(ctx)
245
+ engine = MigrationEngine(config, ledger)
246
+
247
+ all_migrations = engine.discover_migrations()
248
+ applied = engine.applied_migrations()
249
+ pending = engine.pending_migrations()
250
+
251
+ click.echo(f"Project: {config.project_name}")
252
+ click.echo(f"Total migrations: {len(all_migrations)}")
253
+ click.echo(f"Applied: {len(applied)}")
254
+ click.echo(f"Pending: {len(pending)}")
255
+
256
+ head = ledger.get_head()
257
+ if head:
258
+ click.echo(f"Head: {head.revision_id}")
259
+ click.echo(f" applied at: {head.applied_at}")
260
+ click.echo(f" schema hash: {head.schema_hash[:16]}…" if head.schema_hash else "")
261
+ else:
262
+ click.echo("Head: (none)")
263
+
264
+ if pending:
265
+ click.echo("\nPending migrations:")
266
+ for m in pending:
267
+ safe = "safe" if m.rollback_safe else "UNSAFE"
268
+ click.echo(f" - {m.revision_id} [{safe}]")
269
+
270
+ # Dataset batches
271
+ batches = ledger.get_batches()
272
+ if batches:
273
+ click.echo(f"\nDataset batches: {len(batches)}")
274
+ for b in batches[-5:]: # show last 5
275
+ click.echo(f" - {b.batch_id} ({b.source_file or 'no file'})")
276
+
277
+
278
+ # ======================================================================
279
+ # verify
280
+ # ======================================================================
281
+
282
+
283
+ @main.command()
284
+ @click.option("--with-neo4j", is_flag=True, help="Also verify Neo4j connectivity.")
285
+ @click.pass_context
286
+ def verify(ctx: click.Context, with_neo4j: bool) -> None:
287
+ """Run verification checks."""
288
+ from crochet.verify import verify_project
289
+
290
+ config = _get_config(ctx)
291
+ ledger = _get_ledger(ctx)
292
+
293
+ driver = None
294
+ if with_neo4j:
295
+ driver = _try_connect_neo4j(config)
296
+
297
+ try:
298
+ report = verify_project(config, ledger, driver=driver)
299
+ finally:
300
+ if driver:
301
+ driver.close()
302
+
303
+ click.echo(report.summary())
304
+ if not report.passed:
305
+ raise SystemExit(1)
306
+
307
+
308
+ # ======================================================================
309
+ # Helpers
310
+ # ======================================================================
311
+
312
+
313
+ def _try_connect_neo4j(config: CrochetConfig) -> object | None:
314
+ """Try to create a Neo4j driver. Returns None on failure."""
315
+ try:
316
+ from neo4j import GraphDatabase
317
+
318
+ return GraphDatabase.driver(
319
+ config.neo4j.uri,
320
+ auth=(config.neo4j.username, config.neo4j.password),
321
+ )
322
+ except Exception:
323
+ return None
324
+
325
+
326
+ if __name__ == "__main__":
327
+ main()
crochet/config.py ADDED
@@ -0,0 +1,116 @@
1
+ """Project configuration for Crochet."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from dataclasses import dataclass, field
7
+ from pathlib import Path
8
+
9
+ import toml
10
+
11
+ from crochet.errors import ConfigError, ProjectNotInitializedError
12
+
13
+ CONFIG_FILENAME = "crochet.toml"
14
+ DEFAULT_MODELS_PATH = "models"
15
+ DEFAULT_MIGRATIONS_PATH = "migrations"
16
+ DEFAULT_LEDGER_PATH = ".crochet/ledger.db"
17
+
18
+
19
+ @dataclass
20
+ class Neo4jConfig:
21
+ uri: str = "bolt://localhost:7687"
22
+ username: str = "neo4j"
23
+ password: str = ""
24
+
25
+ def __post_init__(self) -> None:
26
+ self.uri = os.environ.get("CROCHET_NEO4J_URI", self.uri)
27
+ self.username = os.environ.get("CROCHET_NEO4J_USERNAME", self.username)
28
+ self.password = os.environ.get("CROCHET_NEO4J_PASSWORD", self.password)
29
+
30
+
31
+ @dataclass
32
+ class CrochetConfig:
33
+ project_name: str = "my-graph"
34
+ models_path: str = DEFAULT_MODELS_PATH
35
+ migrations_path: str = DEFAULT_MIGRATIONS_PATH
36
+ ledger_path: str = DEFAULT_LEDGER_PATH
37
+ neo4j: Neo4jConfig = field(default_factory=Neo4jConfig)
38
+ project_root: Path = field(default_factory=lambda: Path.cwd())
39
+
40
+ @property
41
+ def models_dir(self) -> Path:
42
+ return self.project_root / self.models_path
43
+
44
+ @property
45
+ def migrations_dir(self) -> Path:
46
+ return self.project_root / self.migrations_path
47
+
48
+ @property
49
+ def ledger_file(self) -> Path:
50
+ return self.project_root / self.ledger_path
51
+
52
+ def to_dict(self) -> dict:
53
+ return {
54
+ "project": {
55
+ "name": self.project_name,
56
+ "models_path": self.models_path,
57
+ "migrations_path": self.migrations_path,
58
+ },
59
+ "neo4j": {
60
+ "uri": self.neo4j.uri,
61
+ "username": self.neo4j.username,
62
+ },
63
+ "ledger": {
64
+ "path": self.ledger_path,
65
+ },
66
+ }
67
+
68
+ def save(self, path: Path | None = None) -> None:
69
+ target = path or (self.project_root / CONFIG_FILENAME)
70
+ target.parent.mkdir(parents=True, exist_ok=True)
71
+ with open(target, "w") as f:
72
+ toml.dump(self.to_dict(), f)
73
+
74
+
75
+ def find_project_root(start: Path | None = None) -> Path:
76
+ """Walk up from *start* looking for crochet.toml."""
77
+ current = (start or Path.cwd()).resolve()
78
+ while True:
79
+ if (current / CONFIG_FILENAME).exists():
80
+ return current
81
+ parent = current.parent
82
+ if parent == current:
83
+ raise ProjectNotInitializedError(str(start or Path.cwd()))
84
+ current = parent
85
+
86
+
87
+ def load_config(project_root: Path | None = None) -> CrochetConfig:
88
+ """Load and return the project configuration."""
89
+ root = project_root or find_project_root()
90
+ config_path = root / CONFIG_FILENAME
91
+ if not config_path.exists():
92
+ raise ProjectNotInitializedError(str(root))
93
+
94
+ try:
95
+ data = toml.load(config_path)
96
+ except Exception as exc:
97
+ raise ConfigError(f"Failed to parse {config_path}: {exc}") from exc
98
+
99
+ proj = data.get("project", {})
100
+ neo = data.get("neo4j", {})
101
+ ledger = data.get("ledger", {})
102
+
103
+ neo4j_config = Neo4jConfig(
104
+ uri=neo.get("uri", "bolt://localhost:7687"),
105
+ username=neo.get("username", "neo4j"),
106
+ password=neo.get("password", ""),
107
+ )
108
+
109
+ return CrochetConfig(
110
+ project_name=proj.get("name", "my-graph"),
111
+ models_path=proj.get("models_path", DEFAULT_MODELS_PATH),
112
+ migrations_path=proj.get("migrations_path", DEFAULT_MIGRATIONS_PATH),
113
+ ledger_path=ledger.get("path", DEFAULT_LEDGER_PATH),
114
+ neo4j=neo4j_config,
115
+ project_root=root,
116
+ )
crochet/errors.py ADDED
@@ -0,0 +1,75 @@
1
+ """Custom exceptions for the Crochet framework."""
2
+
3
+
4
+ class CrochetError(Exception):
5
+ """Base exception for all Crochet errors."""
6
+
7
+
8
+ class ProjectNotInitializedError(CrochetError):
9
+ """Raised when a crochet command is run outside an initialized project."""
10
+
11
+ def __init__(self, path: str = "."):
12
+ super().__init__(
13
+ f"No crochet project found at '{path}'. Run 'crochet new-project' first."
14
+ )
15
+
16
+
17
+ class ConfigError(CrochetError):
18
+ """Raised for configuration file issues."""
19
+
20
+
21
+ class SchemaError(CrochetError):
22
+ """Raised for schema parsing or validation issues."""
23
+
24
+
25
+ class MissingKGIDError(SchemaError):
26
+ """Raised when a neomodel class is missing a __kgid__."""
27
+
28
+ def __init__(self, class_name: str):
29
+ super().__init__(
30
+ f"Class '{class_name}' is missing a __kgid__ attribute. "
31
+ "Every node and relationship model must declare an immutable __kgid__."
32
+ )
33
+
34
+
35
+ class DuplicateKGIDError(SchemaError):
36
+ """Raised when two classes share the same __kgid__."""
37
+
38
+ def __init__(self, kgid: str, class1: str, class2: str):
39
+ super().__init__(
40
+ f"Duplicate __kgid__ '{kgid}' found on classes '{class1}' and '{class2}'."
41
+ )
42
+
43
+
44
+ class MigrationError(CrochetError):
45
+ """Raised for migration execution issues."""
46
+
47
+
48
+ class MigrationChainError(MigrationError):
49
+ """Raised when the migration chain is broken or inconsistent."""
50
+
51
+
52
+ class RollbackUnsafeError(MigrationError):
53
+ """Raised when attempting to downgrade a non-rollback-safe migration."""
54
+
55
+ def __init__(self, revision_id: str):
56
+ super().__init__(
57
+ f"Migration '{revision_id}' is marked as rollback-unsafe. "
58
+ "Downgrade is not permitted."
59
+ )
60
+
61
+
62
+ class LedgerError(CrochetError):
63
+ """Raised for SQLite ledger issues."""
64
+
65
+
66
+ class LedgerIntegrityError(LedgerError):
67
+ """Raised when the ledger state is inconsistent."""
68
+
69
+
70
+ class IngestError(CrochetError):
71
+ """Raised for data ingest issues."""
72
+
73
+
74
+ class VerificationError(CrochetError):
75
+ """Raised when verification checks fail."""
@@ -0,0 +1,5 @@
1
+ """Data ingest and batch tracking."""
2
+
3
+ from crochet.ingest.batch import compute_file_checksum, IngestTracker
4
+
5
+ __all__ = ["compute_file_checksum", "IngestTracker"]
@@ -0,0 +1,61 @@
1
+ """Deterministic data-ingest tracking: checksums, provenance, batch IDs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import uuid
7
+ from pathlib import Path
8
+
9
+ from crochet.errors import IngestError
10
+ from crochet.ledger.sqlite import DatasetBatch, Ledger
11
+
12
+
13
+ def compute_file_checksum(path: Path, algorithm: str = "sha256") -> str:
14
+ """Return the hex digest of a file."""
15
+ h = hashlib.new(algorithm)
16
+ with open(path, "rb") as f:
17
+ for chunk in iter(lambda: f.read(8192), b""):
18
+ h.update(chunk)
19
+ return h.hexdigest()
20
+
21
+
22
+ class IngestTracker:
23
+ """High-level helper that ties data loading to the ledger."""
24
+
25
+ def __init__(self, ledger: Ledger, loader_version: str = "1.0") -> None:
26
+ self._ledger = ledger
27
+ self._loader_version = loader_version
28
+
29
+ def register_batch(
30
+ self,
31
+ source_file: Path | None = None,
32
+ migration_id: str | None = None,
33
+ record_count: int | None = None,
34
+ batch_id: str | None = None,
35
+ ) -> DatasetBatch:
36
+ bid = batch_id or uuid.uuid4().hex[:12]
37
+ checksum = None
38
+ fname = None
39
+ if source_file is not None:
40
+ if not source_file.exists():
41
+ raise IngestError(f"Source file not found: {source_file}")
42
+ checksum = compute_file_checksum(source_file)
43
+ fname = str(source_file)
44
+
45
+ return self._ledger.record_batch(
46
+ batch_id=bid,
47
+ migration_id=migration_id,
48
+ source_file=fname,
49
+ file_checksum=checksum,
50
+ loader_version=self._loader_version,
51
+ record_count=record_count,
52
+ )
53
+
54
+ def verify_file(self, batch: DatasetBatch) -> bool:
55
+ """Check that the source file still matches the recorded checksum."""
56
+ if batch.source_file is None or batch.file_checksum is None:
57
+ return True
58
+ path = Path(batch.source_file)
59
+ if not path.exists():
60
+ return False
61
+ return compute_file_checksum(path) == batch.file_checksum
crochet/ir/__init__.py ADDED
@@ -0,0 +1,23 @@
1
+ """Intermediate Representation for neomodel schemas."""
2
+
3
+ from crochet.ir.schema import (
4
+ PropertyIR,
5
+ NodeIR,
6
+ RelationshipIR,
7
+ SchemaSnapshot,
8
+ )
9
+ from crochet.ir.parser import parse_models_directory, parse_module
10
+ from crochet.ir.diff import SchemaDiff, diff_snapshots
11
+ from crochet.ir.hash import hash_snapshot
12
+
13
+ __all__ = [
14
+ "PropertyIR",
15
+ "NodeIR",
16
+ "RelationshipIR",
17
+ "SchemaSnapshot",
18
+ "parse_models_directory",
19
+ "parse_module",
20
+ "SchemaDiff",
21
+ "diff_snapshots",
22
+ "hash_snapshot",
23
+ ]