crochet-migration 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crochet/__init__.py +3 -0
- crochet/cli.py +327 -0
- crochet/config.py +116 -0
- crochet/errors.py +75 -0
- crochet/ingest/__init__.py +5 -0
- crochet/ingest/batch.py +61 -0
- crochet/ir/__init__.py +23 -0
- crochet/ir/diff.py +199 -0
- crochet/ir/hash.py +36 -0
- crochet/ir/parser.py +251 -0
- crochet/ir/schema.py +196 -0
- crochet/ledger/__init__.py +5 -0
- crochet/ledger/sqlite.py +282 -0
- crochet/migrations/__init__.py +6 -0
- crochet/migrations/engine.py +279 -0
- crochet/migrations/operations.py +267 -0
- crochet/migrations/template.py +105 -0
- crochet/scaffold/__init__.py +6 -0
- crochet/scaffold/node.py +48 -0
- crochet/scaffold/relationship.py +52 -0
- crochet/verify.py +141 -0
- crochet_migration-0.1.0.dist-info/METADATA +278 -0
- crochet_migration-0.1.0.dist-info/RECORD +26 -0
- crochet_migration-0.1.0.dist-info/WHEEL +4 -0
- crochet_migration-0.1.0.dist-info/entry_points.txt +2 -0
- crochet_migration-0.1.0.dist-info/licenses/LICENSE +21 -0
crochet/__init__.py
ADDED
crochet/cli.py
ADDED
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
"""Crochet CLI — command-line interface for managing neomodel migrations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import click
|
|
9
|
+
|
|
10
|
+
from crochet import __version__
|
|
11
|
+
from crochet.config import CrochetConfig, load_config, find_project_root
|
|
12
|
+
from crochet.errors import CrochetError, ProjectNotInitializedError
|
|
13
|
+
from crochet.ledger.sqlite import Ledger
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _get_config(ctx: click.Context) -> CrochetConfig:
|
|
17
|
+
"""Load config, attaching it to the Click context."""
|
|
18
|
+
if "config" not in ctx.obj:
|
|
19
|
+
ctx.obj["config"] = load_config()
|
|
20
|
+
return ctx.obj["config"]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _get_ledger(ctx: click.Context) -> Ledger:
|
|
24
|
+
"""Open the ledger, attaching it to the Click context for cleanup."""
|
|
25
|
+
if "ledger" not in ctx.obj:
|
|
26
|
+
config = _get_config(ctx)
|
|
27
|
+
ctx.obj["ledger"] = Ledger(config.ledger_file)
|
|
28
|
+
return ctx.obj["ledger"]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# ======================================================================
|
|
32
|
+
# Root group
|
|
33
|
+
# ======================================================================
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@click.group()
|
|
37
|
+
@click.version_option(__version__, prog_name="crochet")
|
|
38
|
+
@click.pass_context
|
|
39
|
+
def main(ctx: click.Context) -> None:
|
|
40
|
+
"""Crochet — versioned schema & data migrations for neomodel graphs."""
|
|
41
|
+
ctx.ensure_object(dict)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# ======================================================================
|
|
45
|
+
# new-project
|
|
46
|
+
# ======================================================================
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@main.command("new-project")
|
|
50
|
+
@click.option("--name", default="my-graph", help="Project name.")
|
|
51
|
+
@click.option(
|
|
52
|
+
"--path",
|
|
53
|
+
type=click.Path(),
|
|
54
|
+
default=".",
|
|
55
|
+
help="Directory to initialize (default: current directory).",
|
|
56
|
+
)
|
|
57
|
+
@click.pass_context
|
|
58
|
+
def new_project(ctx: click.Context, name: str, path: str) -> None:
|
|
59
|
+
"""Initialize a new Crochet project."""
|
|
60
|
+
root = Path(path).resolve()
|
|
61
|
+
|
|
62
|
+
config = CrochetConfig(project_name=name, project_root=root)
|
|
63
|
+
config.save()
|
|
64
|
+
|
|
65
|
+
# Create directories
|
|
66
|
+
config.models_dir.mkdir(parents=True, exist_ok=True)
|
|
67
|
+
(config.models_dir / "__init__.py").touch()
|
|
68
|
+
config.migrations_dir.mkdir(parents=True, exist_ok=True)
|
|
69
|
+
(config.migrations_dir / "__init__.py").touch()
|
|
70
|
+
|
|
71
|
+
# Initialize ledger
|
|
72
|
+
with Ledger(config.ledger_file):
|
|
73
|
+
pass
|
|
74
|
+
|
|
75
|
+
click.echo(f"Initialized crochet project '{name}' at {root}")
|
|
76
|
+
click.echo(f" config: {root / 'crochet.toml'}")
|
|
77
|
+
click.echo(f" models: {config.models_dir}")
|
|
78
|
+
click.echo(f" migrations: {config.migrations_dir}")
|
|
79
|
+
click.echo(f" ledger: {config.ledger_file}")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# ======================================================================
|
|
83
|
+
# create-node
|
|
84
|
+
# ======================================================================
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@main.command("create-node")
|
|
88
|
+
@click.argument("class_name")
|
|
89
|
+
@click.option("--kgid", default=None, help="Explicit __kgid__ (auto-generated if omitted).")
|
|
90
|
+
@click.pass_context
|
|
91
|
+
def create_node(ctx: click.Context, class_name: str, kgid: str | None) -> None:
|
|
92
|
+
"""Scaffold a new StructuredNode model file."""
|
|
93
|
+
from crochet.scaffold.node import scaffold_node
|
|
94
|
+
|
|
95
|
+
config = _get_config(ctx)
|
|
96
|
+
path = scaffold_node(config.models_dir, class_name, kgid=kgid)
|
|
97
|
+
click.echo(f"Created node model: {path}")
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# ======================================================================
|
|
101
|
+
# create-relationship
|
|
102
|
+
# ======================================================================
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@main.command("create-relationship")
|
|
106
|
+
@click.argument("class_name")
|
|
107
|
+
@click.option("--rel-type", default=None, help="Neo4j relationship type (default: CLASS_NAME).")
|
|
108
|
+
@click.option("--kgid", default=None, help="Explicit __kgid__ (auto-generated if omitted).")
|
|
109
|
+
@click.pass_context
|
|
110
|
+
def create_relationship(
|
|
111
|
+
ctx: click.Context, class_name: str, rel_type: str | None, kgid: str | None
|
|
112
|
+
) -> None:
|
|
113
|
+
"""Scaffold a new StructuredRel model file."""
|
|
114
|
+
from crochet.scaffold.relationship import scaffold_relationship
|
|
115
|
+
|
|
116
|
+
config = _get_config(ctx)
|
|
117
|
+
path = scaffold_relationship(
|
|
118
|
+
config.models_dir, class_name, rel_type=rel_type, kgid=kgid
|
|
119
|
+
)
|
|
120
|
+
click.echo(f"Created relationship model: {path}")
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
# ======================================================================
|
|
124
|
+
# create-migration
|
|
125
|
+
# ======================================================================
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@main.command("create-migration")
|
|
129
|
+
@click.argument("description")
|
|
130
|
+
@click.option("--no-snapshot", is_flag=True, help="Skip schema snapshot.")
|
|
131
|
+
@click.option("--unsafe", is_flag=True, help="Mark migration as rollback-unsafe.")
|
|
132
|
+
@click.pass_context
|
|
133
|
+
def create_migration(
|
|
134
|
+
ctx: click.Context, description: str, no_snapshot: bool, unsafe: bool
|
|
135
|
+
) -> None:
|
|
136
|
+
"""Create a new migration file."""
|
|
137
|
+
from crochet.ir.parser import parse_models_directory
|
|
138
|
+
from crochet.migrations.engine import MigrationEngine
|
|
139
|
+
|
|
140
|
+
config = _get_config(ctx)
|
|
141
|
+
ledger = _get_ledger(ctx)
|
|
142
|
+
engine = MigrationEngine(config, ledger)
|
|
143
|
+
|
|
144
|
+
snapshot = None
|
|
145
|
+
if not no_snapshot:
|
|
146
|
+
try:
|
|
147
|
+
snapshot = parse_models_directory(config.models_dir)
|
|
148
|
+
except CrochetError as exc:
|
|
149
|
+
click.echo(f"Warning: could not parse models: {exc}", err=True)
|
|
150
|
+
snapshot = None
|
|
151
|
+
|
|
152
|
+
path = engine.create_migration(
|
|
153
|
+
description=description,
|
|
154
|
+
current_snapshot=snapshot,
|
|
155
|
+
rollback_safe=not unsafe,
|
|
156
|
+
)
|
|
157
|
+
click.echo(f"Created migration: {path}")
|
|
158
|
+
if snapshot:
|
|
159
|
+
click.echo(f" schema hash: {snapshot.schema_hash[:16]}…")
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
# ======================================================================
|
|
163
|
+
# upgrade
|
|
164
|
+
# ======================================================================
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@main.command()
|
|
168
|
+
@click.option("--target", default=None, help="Migrate up to this revision.")
|
|
169
|
+
@click.option("--dry-run", is_flag=True, help="Show what would be applied without executing.")
|
|
170
|
+
@click.pass_context
|
|
171
|
+
def upgrade(ctx: click.Context, target: str | None, dry_run: bool) -> None:
|
|
172
|
+
"""Apply pending migrations."""
|
|
173
|
+
from crochet.migrations.engine import MigrationEngine
|
|
174
|
+
|
|
175
|
+
config = _get_config(ctx)
|
|
176
|
+
ledger = _get_ledger(ctx)
|
|
177
|
+
engine = MigrationEngine(config, ledger)
|
|
178
|
+
|
|
179
|
+
driver = _try_connect_neo4j(config) if not dry_run else None
|
|
180
|
+
|
|
181
|
+
try:
|
|
182
|
+
applied = engine.upgrade(target=target, driver=driver, dry_run=dry_run)
|
|
183
|
+
finally:
|
|
184
|
+
if driver:
|
|
185
|
+
driver.close()
|
|
186
|
+
|
|
187
|
+
if not applied:
|
|
188
|
+
click.echo("Nothing to apply — already up to date.")
|
|
189
|
+
else:
|
|
190
|
+
prefix = "[dry-run] " if dry_run else ""
|
|
191
|
+
for rev in applied:
|
|
192
|
+
click.echo(f"{prefix}Applied: {rev}")
|
|
193
|
+
click.echo(f"{prefix}{len(applied)} migration(s) applied.")
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
# ======================================================================
|
|
197
|
+
# downgrade
|
|
198
|
+
# ======================================================================
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
@main.command()
|
|
202
|
+
@click.option("--target", default=None, help="Revert down to (but not including) this revision.")
|
|
203
|
+
@click.option("--dry-run", is_flag=True, help="Show what would be reverted without executing.")
|
|
204
|
+
@click.pass_context
|
|
205
|
+
def downgrade(ctx: click.Context, target: str | None, dry_run: bool) -> None:
|
|
206
|
+
"""Revert the most recent migration (or down to --target)."""
|
|
207
|
+
from crochet.migrations.engine import MigrationEngine
|
|
208
|
+
|
|
209
|
+
config = _get_config(ctx)
|
|
210
|
+
ledger = _get_ledger(ctx)
|
|
211
|
+
engine = MigrationEngine(config, ledger)
|
|
212
|
+
|
|
213
|
+
driver = _try_connect_neo4j(config) if not dry_run else None
|
|
214
|
+
|
|
215
|
+
try:
|
|
216
|
+
reverted = engine.downgrade(target=target, driver=driver, dry_run=dry_run)
|
|
217
|
+
except CrochetError as exc:
|
|
218
|
+
raise click.ClickException(str(exc)) from exc
|
|
219
|
+
finally:
|
|
220
|
+
if driver:
|
|
221
|
+
driver.close()
|
|
222
|
+
|
|
223
|
+
if not reverted:
|
|
224
|
+
click.echo("Nothing to revert.")
|
|
225
|
+
else:
|
|
226
|
+
prefix = "[dry-run] " if dry_run else ""
|
|
227
|
+
for rev in reverted:
|
|
228
|
+
click.echo(f"{prefix}Reverted: {rev}")
|
|
229
|
+
click.echo(f"{prefix}{len(reverted)} migration(s) reverted.")
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
# ======================================================================
|
|
233
|
+
# status
|
|
234
|
+
# ======================================================================
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
@main.command()
|
|
238
|
+
@click.pass_context
|
|
239
|
+
def status(ctx: click.Context) -> None:
|
|
240
|
+
"""Show migration status."""
|
|
241
|
+
from crochet.migrations.engine import MigrationEngine
|
|
242
|
+
|
|
243
|
+
config = _get_config(ctx)
|
|
244
|
+
ledger = _get_ledger(ctx)
|
|
245
|
+
engine = MigrationEngine(config, ledger)
|
|
246
|
+
|
|
247
|
+
all_migrations = engine.discover_migrations()
|
|
248
|
+
applied = engine.applied_migrations()
|
|
249
|
+
pending = engine.pending_migrations()
|
|
250
|
+
|
|
251
|
+
click.echo(f"Project: {config.project_name}")
|
|
252
|
+
click.echo(f"Total migrations: {len(all_migrations)}")
|
|
253
|
+
click.echo(f"Applied: {len(applied)}")
|
|
254
|
+
click.echo(f"Pending: {len(pending)}")
|
|
255
|
+
|
|
256
|
+
head = ledger.get_head()
|
|
257
|
+
if head:
|
|
258
|
+
click.echo(f"Head: {head.revision_id}")
|
|
259
|
+
click.echo(f" applied at: {head.applied_at}")
|
|
260
|
+
click.echo(f" schema hash: {head.schema_hash[:16]}…" if head.schema_hash else "")
|
|
261
|
+
else:
|
|
262
|
+
click.echo("Head: (none)")
|
|
263
|
+
|
|
264
|
+
if pending:
|
|
265
|
+
click.echo("\nPending migrations:")
|
|
266
|
+
for m in pending:
|
|
267
|
+
safe = "safe" if m.rollback_safe else "UNSAFE"
|
|
268
|
+
click.echo(f" - {m.revision_id} [{safe}]")
|
|
269
|
+
|
|
270
|
+
# Dataset batches
|
|
271
|
+
batches = ledger.get_batches()
|
|
272
|
+
if batches:
|
|
273
|
+
click.echo(f"\nDataset batches: {len(batches)}")
|
|
274
|
+
for b in batches[-5:]: # show last 5
|
|
275
|
+
click.echo(f" - {b.batch_id} ({b.source_file or 'no file'})")
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
# ======================================================================
|
|
279
|
+
# verify
|
|
280
|
+
# ======================================================================
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
@main.command()
|
|
284
|
+
@click.option("--with-neo4j", is_flag=True, help="Also verify Neo4j connectivity.")
|
|
285
|
+
@click.pass_context
|
|
286
|
+
def verify(ctx: click.Context, with_neo4j: bool) -> None:
|
|
287
|
+
"""Run verification checks."""
|
|
288
|
+
from crochet.verify import verify_project
|
|
289
|
+
|
|
290
|
+
config = _get_config(ctx)
|
|
291
|
+
ledger = _get_ledger(ctx)
|
|
292
|
+
|
|
293
|
+
driver = None
|
|
294
|
+
if with_neo4j:
|
|
295
|
+
driver = _try_connect_neo4j(config)
|
|
296
|
+
|
|
297
|
+
try:
|
|
298
|
+
report = verify_project(config, ledger, driver=driver)
|
|
299
|
+
finally:
|
|
300
|
+
if driver:
|
|
301
|
+
driver.close()
|
|
302
|
+
|
|
303
|
+
click.echo(report.summary())
|
|
304
|
+
if not report.passed:
|
|
305
|
+
raise SystemExit(1)
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
# ======================================================================
|
|
309
|
+
# Helpers
|
|
310
|
+
# ======================================================================
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def _try_connect_neo4j(config: CrochetConfig) -> object | None:
|
|
314
|
+
"""Try to create a Neo4j driver. Returns None on failure."""
|
|
315
|
+
try:
|
|
316
|
+
from neo4j import GraphDatabase
|
|
317
|
+
|
|
318
|
+
return GraphDatabase.driver(
|
|
319
|
+
config.neo4j.uri,
|
|
320
|
+
auth=(config.neo4j.username, config.neo4j.password),
|
|
321
|
+
)
|
|
322
|
+
except Exception:
|
|
323
|
+
return None
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
if __name__ == "__main__":
|
|
327
|
+
main()
|
crochet/config.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""Project configuration for Crochet."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
import toml
|
|
10
|
+
|
|
11
|
+
from crochet.errors import ConfigError, ProjectNotInitializedError
|
|
12
|
+
|
|
13
|
+
CONFIG_FILENAME = "crochet.toml"
|
|
14
|
+
DEFAULT_MODELS_PATH = "models"
|
|
15
|
+
DEFAULT_MIGRATIONS_PATH = "migrations"
|
|
16
|
+
DEFAULT_LEDGER_PATH = ".crochet/ledger.db"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class Neo4jConfig:
|
|
21
|
+
uri: str = "bolt://localhost:7687"
|
|
22
|
+
username: str = "neo4j"
|
|
23
|
+
password: str = ""
|
|
24
|
+
|
|
25
|
+
def __post_init__(self) -> None:
|
|
26
|
+
self.uri = os.environ.get("CROCHET_NEO4J_URI", self.uri)
|
|
27
|
+
self.username = os.environ.get("CROCHET_NEO4J_USERNAME", self.username)
|
|
28
|
+
self.password = os.environ.get("CROCHET_NEO4J_PASSWORD", self.password)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class CrochetConfig:
|
|
33
|
+
project_name: str = "my-graph"
|
|
34
|
+
models_path: str = DEFAULT_MODELS_PATH
|
|
35
|
+
migrations_path: str = DEFAULT_MIGRATIONS_PATH
|
|
36
|
+
ledger_path: str = DEFAULT_LEDGER_PATH
|
|
37
|
+
neo4j: Neo4jConfig = field(default_factory=Neo4jConfig)
|
|
38
|
+
project_root: Path = field(default_factory=lambda: Path.cwd())
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def models_dir(self) -> Path:
|
|
42
|
+
return self.project_root / self.models_path
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def migrations_dir(self) -> Path:
|
|
46
|
+
return self.project_root / self.migrations_path
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def ledger_file(self) -> Path:
|
|
50
|
+
return self.project_root / self.ledger_path
|
|
51
|
+
|
|
52
|
+
def to_dict(self) -> dict:
|
|
53
|
+
return {
|
|
54
|
+
"project": {
|
|
55
|
+
"name": self.project_name,
|
|
56
|
+
"models_path": self.models_path,
|
|
57
|
+
"migrations_path": self.migrations_path,
|
|
58
|
+
},
|
|
59
|
+
"neo4j": {
|
|
60
|
+
"uri": self.neo4j.uri,
|
|
61
|
+
"username": self.neo4j.username,
|
|
62
|
+
},
|
|
63
|
+
"ledger": {
|
|
64
|
+
"path": self.ledger_path,
|
|
65
|
+
},
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
def save(self, path: Path | None = None) -> None:
|
|
69
|
+
target = path or (self.project_root / CONFIG_FILENAME)
|
|
70
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
71
|
+
with open(target, "w") as f:
|
|
72
|
+
toml.dump(self.to_dict(), f)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def find_project_root(start: Path | None = None) -> Path:
|
|
76
|
+
"""Walk up from *start* looking for crochet.toml."""
|
|
77
|
+
current = (start or Path.cwd()).resolve()
|
|
78
|
+
while True:
|
|
79
|
+
if (current / CONFIG_FILENAME).exists():
|
|
80
|
+
return current
|
|
81
|
+
parent = current.parent
|
|
82
|
+
if parent == current:
|
|
83
|
+
raise ProjectNotInitializedError(str(start or Path.cwd()))
|
|
84
|
+
current = parent
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def load_config(project_root: Path | None = None) -> CrochetConfig:
|
|
88
|
+
"""Load and return the project configuration."""
|
|
89
|
+
root = project_root or find_project_root()
|
|
90
|
+
config_path = root / CONFIG_FILENAME
|
|
91
|
+
if not config_path.exists():
|
|
92
|
+
raise ProjectNotInitializedError(str(root))
|
|
93
|
+
|
|
94
|
+
try:
|
|
95
|
+
data = toml.load(config_path)
|
|
96
|
+
except Exception as exc:
|
|
97
|
+
raise ConfigError(f"Failed to parse {config_path}: {exc}") from exc
|
|
98
|
+
|
|
99
|
+
proj = data.get("project", {})
|
|
100
|
+
neo = data.get("neo4j", {})
|
|
101
|
+
ledger = data.get("ledger", {})
|
|
102
|
+
|
|
103
|
+
neo4j_config = Neo4jConfig(
|
|
104
|
+
uri=neo.get("uri", "bolt://localhost:7687"),
|
|
105
|
+
username=neo.get("username", "neo4j"),
|
|
106
|
+
password=neo.get("password", ""),
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
return CrochetConfig(
|
|
110
|
+
project_name=proj.get("name", "my-graph"),
|
|
111
|
+
models_path=proj.get("models_path", DEFAULT_MODELS_PATH),
|
|
112
|
+
migrations_path=proj.get("migrations_path", DEFAULT_MIGRATIONS_PATH),
|
|
113
|
+
ledger_path=ledger.get("path", DEFAULT_LEDGER_PATH),
|
|
114
|
+
neo4j=neo4j_config,
|
|
115
|
+
project_root=root,
|
|
116
|
+
)
|
crochet/errors.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""Custom exceptions for the Crochet framework."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class CrochetError(Exception):
|
|
5
|
+
"""Base exception for all Crochet errors."""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ProjectNotInitializedError(CrochetError):
|
|
9
|
+
"""Raised when a crochet command is run outside an initialized project."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, path: str = "."):
|
|
12
|
+
super().__init__(
|
|
13
|
+
f"No crochet project found at '{path}'. Run 'crochet new-project' first."
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ConfigError(CrochetError):
|
|
18
|
+
"""Raised for configuration file issues."""
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class SchemaError(CrochetError):
|
|
22
|
+
"""Raised for schema parsing or validation issues."""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class MissingKGIDError(SchemaError):
|
|
26
|
+
"""Raised when a neomodel class is missing a __kgid__."""
|
|
27
|
+
|
|
28
|
+
def __init__(self, class_name: str):
|
|
29
|
+
super().__init__(
|
|
30
|
+
f"Class '{class_name}' is missing a __kgid__ attribute. "
|
|
31
|
+
"Every node and relationship model must declare an immutable __kgid__."
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class DuplicateKGIDError(SchemaError):
|
|
36
|
+
"""Raised when two classes share the same __kgid__."""
|
|
37
|
+
|
|
38
|
+
def __init__(self, kgid: str, class1: str, class2: str):
|
|
39
|
+
super().__init__(
|
|
40
|
+
f"Duplicate __kgid__ '{kgid}' found on classes '{class1}' and '{class2}'."
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class MigrationError(CrochetError):
|
|
45
|
+
"""Raised for migration execution issues."""
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class MigrationChainError(MigrationError):
|
|
49
|
+
"""Raised when the migration chain is broken or inconsistent."""
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class RollbackUnsafeError(MigrationError):
|
|
53
|
+
"""Raised when attempting to downgrade a non-rollback-safe migration."""
|
|
54
|
+
|
|
55
|
+
def __init__(self, revision_id: str):
|
|
56
|
+
super().__init__(
|
|
57
|
+
f"Migration '{revision_id}' is marked as rollback-unsafe. "
|
|
58
|
+
"Downgrade is not permitted."
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class LedgerError(CrochetError):
|
|
63
|
+
"""Raised for SQLite ledger issues."""
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class LedgerIntegrityError(LedgerError):
|
|
67
|
+
"""Raised when the ledger state is inconsistent."""
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class IngestError(CrochetError):
|
|
71
|
+
"""Raised for data ingest issues."""
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class VerificationError(CrochetError):
|
|
75
|
+
"""Raised when verification checks fail."""
|
crochet/ingest/batch.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Deterministic data-ingest tracking: checksums, provenance, batch IDs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import uuid
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from crochet.errors import IngestError
|
|
10
|
+
from crochet.ledger.sqlite import DatasetBatch, Ledger
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def compute_file_checksum(path: Path, algorithm: str = "sha256") -> str:
|
|
14
|
+
"""Return the hex digest of a file."""
|
|
15
|
+
h = hashlib.new(algorithm)
|
|
16
|
+
with open(path, "rb") as f:
|
|
17
|
+
for chunk in iter(lambda: f.read(8192), b""):
|
|
18
|
+
h.update(chunk)
|
|
19
|
+
return h.hexdigest()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class IngestTracker:
|
|
23
|
+
"""High-level helper that ties data loading to the ledger."""
|
|
24
|
+
|
|
25
|
+
def __init__(self, ledger: Ledger, loader_version: str = "1.0") -> None:
|
|
26
|
+
self._ledger = ledger
|
|
27
|
+
self._loader_version = loader_version
|
|
28
|
+
|
|
29
|
+
def register_batch(
|
|
30
|
+
self,
|
|
31
|
+
source_file: Path | None = None,
|
|
32
|
+
migration_id: str | None = None,
|
|
33
|
+
record_count: int | None = None,
|
|
34
|
+
batch_id: str | None = None,
|
|
35
|
+
) -> DatasetBatch:
|
|
36
|
+
bid = batch_id or uuid.uuid4().hex[:12]
|
|
37
|
+
checksum = None
|
|
38
|
+
fname = None
|
|
39
|
+
if source_file is not None:
|
|
40
|
+
if not source_file.exists():
|
|
41
|
+
raise IngestError(f"Source file not found: {source_file}")
|
|
42
|
+
checksum = compute_file_checksum(source_file)
|
|
43
|
+
fname = str(source_file)
|
|
44
|
+
|
|
45
|
+
return self._ledger.record_batch(
|
|
46
|
+
batch_id=bid,
|
|
47
|
+
migration_id=migration_id,
|
|
48
|
+
source_file=fname,
|
|
49
|
+
file_checksum=checksum,
|
|
50
|
+
loader_version=self._loader_version,
|
|
51
|
+
record_count=record_count,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
def verify_file(self, batch: DatasetBatch) -> bool:
|
|
55
|
+
"""Check that the source file still matches the recorded checksum."""
|
|
56
|
+
if batch.source_file is None or batch.file_checksum is None:
|
|
57
|
+
return True
|
|
58
|
+
path = Path(batch.source_file)
|
|
59
|
+
if not path.exists():
|
|
60
|
+
return False
|
|
61
|
+
return compute_file_checksum(path) == batch.file_checksum
|
crochet/ir/__init__.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Intermediate Representation for neomodel schemas."""
|
|
2
|
+
|
|
3
|
+
from crochet.ir.schema import (
|
|
4
|
+
PropertyIR,
|
|
5
|
+
NodeIR,
|
|
6
|
+
RelationshipIR,
|
|
7
|
+
SchemaSnapshot,
|
|
8
|
+
)
|
|
9
|
+
from crochet.ir.parser import parse_models_directory, parse_module
|
|
10
|
+
from crochet.ir.diff import SchemaDiff, diff_snapshots
|
|
11
|
+
from crochet.ir.hash import hash_snapshot
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"PropertyIR",
|
|
15
|
+
"NodeIR",
|
|
16
|
+
"RelationshipIR",
|
|
17
|
+
"SchemaSnapshot",
|
|
18
|
+
"parse_models_directory",
|
|
19
|
+
"parse_module",
|
|
20
|
+
"SchemaDiff",
|
|
21
|
+
"diff_snapshots",
|
|
22
|
+
"hash_snapshot",
|
|
23
|
+
]
|