loki-mode 5.53.0 → 5.55.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,886 @@
1
+ """
2
+ Migration Engine for Loki Mode.
3
+
4
+ Core backend for the `loki migrate` enterprise code transformation feature.
5
+ Implements data models, MigrationPipeline, and phase gates for safe,
6
+ incremental codebase migrations with checkpoint/rollback support.
7
+ """
8
+
9
+ import dataclasses
10
+ import json
11
+ import logging
12
+ import os
13
+ import re
14
+ import subprocess
15
+ import tempfile
16
+ import threading
17
+ from dataclasses import asdict, dataclass, field
18
+ from datetime import datetime, timezone
19
+ from pathlib import Path
20
+ from typing import Any, Optional
21
+
22
+ logger = logging.getLogger("loki-migration")
23
+
24
+ LOKI_DATA_DIR = os.environ.get("LOKI_DATA_DIR", os.path.expanduser("~/.loki"))
25
+ MIGRATIONS_DIR = os.path.join(LOKI_DATA_DIR, "migrations")
26
+
27
+ # Phase ordering for gate validation
28
+ PHASE_ORDER = ["understand", "guardrail", "migrate", "verify"]
29
+
30
+
31
+ # ---------------------------------------------------------------------------
32
+ # Data Models
33
+ # ---------------------------------------------------------------------------
34
+
35
+
36
+ @dataclass
37
+ class Feature:
38
+ """Individual feature tracked during migration."""
39
+
40
+ id: str
41
+ category: str
42
+ description: str
43
+ verification_steps: list[str] = field(default_factory=list)
44
+ passes: bool = False
45
+ characterization_test: str = ""
46
+ risk: str = "low"
47
+ notes: str = ""
48
+
49
+
50
+ @dataclass
51
+ class MigrationStep:
52
+ """Single step in a migration plan."""
53
+
54
+ id: str
55
+ description: str
56
+ type: str # e.g. "refactor", "rewrite", "config", "test"
57
+ files: list[str] = field(default_factory=list)
58
+ tests_required: list[str] = field(default_factory=list)
59
+ estimated_tokens: int = 0
60
+ risk: str = "low"
61
+ rollback_point: bool = False
62
+ depends_on: list[str] = field(default_factory=list)
63
+ assigned_agent: str = ""
64
+ status: str = "pending" # pending | in_progress | completed | failed
65
+
66
+
67
+ @dataclass
68
+ class MigrationPlan:
69
+ """Full migration plan with strategy and steps."""
70
+
71
+ version: int = 1
72
+ strategy: str = "incremental"
73
+ constraints: list[str] = field(default_factory=list)
74
+ steps: list[MigrationStep] = field(default_factory=list)
75
+ rollback_strategy: str = "checkpoint"
76
+ exit_criteria: dict[str, Any] = field(default_factory=dict)
77
+
78
+
79
+ @dataclass
80
+ class SeamInfo:
81
+ """Detected seam (boundary/interface) in the codebase."""
82
+
83
+ id: str
84
+ type: str # e.g. "api", "module", "database", "config"
85
+ location: str
86
+ description: str
87
+ confidence: float = 0.0
88
+ suggested_interface: str = ""
89
+
90
+
91
+ @dataclass
92
+ class PhaseResult:
93
+ """Result of executing a migration phase."""
94
+
95
+ phase: str
96
+ status: str # pending | in_progress | completed | failed
97
+ artifacts: list[str] = field(default_factory=list)
98
+ started_at: str = ""
99
+ completed_at: str = ""
100
+ error: str = ""
101
+
102
+
103
+ @dataclass
104
+ class CostEstimate:
105
+ """Token cost estimation for migration."""
106
+
107
+ total_tokens: int = 0
108
+ estimated_cost_usd: float = 0.0
109
+ by_phase: dict[str, int] = field(default_factory=dict)
110
+
111
+
112
+ @dataclass
113
+ class MigrationManifest:
114
+ """Tracks overall migration state."""
115
+
116
+ id: str = ""
117
+ created_at: str = ""
118
+ source_info: dict[str, Any] = field(default_factory=dict)
119
+ target_info: dict[str, Any] = field(default_factory=dict)
120
+ phases: dict[str, dict[str, Any]] = field(default_factory=dict)
121
+ feature_list_path: str = ""
122
+ migration_plan_path: str = ""
123
+ checkpoints: list[str] = field(default_factory=list)
124
+
125
+
126
+ # ---------------------------------------------------------------------------
127
+ # Atomic file write helper
128
+ # ---------------------------------------------------------------------------
129
+
130
+
131
+ def _atomic_write(path: Path, content: str) -> None:
132
+ """Write content to file atomically using temp-file-then-rename.
133
+
134
+ Matches the pattern in prompt_optimizer.py for POSIX safety.
135
+ """
136
+ path.parent.mkdir(parents=True, exist_ok=True)
137
+ tmp_path = None
138
+ try:
139
+ fd, tmp_path = tempfile.mkstemp(
140
+ dir=str(path.parent), suffix=".tmp"
141
+ )
142
+ try:
143
+ os.write(fd, content.encode("utf-8"))
144
+ os.fsync(fd)
145
+ finally:
146
+ os.close(fd)
147
+ os.rename(tmp_path, str(path))
148
+ except OSError as exc:
149
+ logger.error("Failed to write %s: %s", path, exc)
150
+ # Clean up temp file on failure
151
+ if tmp_path is not None:
152
+ try:
153
+ os.unlink(tmp_path)
154
+ except OSError:
155
+ pass
156
+ raise
157
+
158
+
159
+ def _timestamp_iso() -> str:
160
+ """Return current UTC timestamp in ISO format."""
161
+ return datetime.now(timezone.utc).isoformat()
162
+
163
+
164
+ # ---------------------------------------------------------------------------
165
+ # MigrationPipeline
166
+ # ---------------------------------------------------------------------------
167
+
168
+
169
+ class MigrationPipeline:
170
+ """Manages the lifecycle of a codebase migration.
171
+
172
+ All state is persisted under ~/.loki/migrations/<migration_id>/.
173
+ Thread-safe for concurrent manifest reads and writes.
174
+ """
175
+
176
+ def __init__(
177
+ self,
178
+ codebase_path: str,
179
+ target: str,
180
+ options: Optional[dict[str, Any]] = None,
181
+ ) -> None:
182
+ self.codebase_path = os.path.abspath(codebase_path.rstrip(os.sep))
183
+ basename = os.path.basename(self.codebase_path)
184
+ if not basename:
185
+ raise ValueError(f"Cannot derive project name from codebase path: {codebase_path}")
186
+ self.target = target
187
+ self.options = options or {}
188
+ self.migration_id = self._generate_migration_id()
189
+ self.migration_dir = Path(MIGRATIONS_DIR) / self.migration_id
190
+ self._lock = threading.Lock()
191
+ self._logger = logging.getLogger("loki-migration")
192
+
193
+ # Ensure directory structure exists
194
+ self.migration_dir.mkdir(parents=True, exist_ok=True)
195
+ (self.migration_dir / "docs").mkdir(exist_ok=True)
196
+ (self.migration_dir / "checkpoints").mkdir(exist_ok=True)
197
+
198
+ def _generate_migration_id(self) -> str:
199
+ """Generate a unique migration ID like mig_20260223_143052_<dirname>."""
200
+ dirname = os.path.basename(self.codebase_path)
201
+ # Sanitize dirname to match validation regex
202
+ safe_dirname = re.sub(r'[^a-zA-Z0-9_-]', '_', dirname)
203
+ if not safe_dirname:
204
+ safe_dirname = 'unnamed'
205
+ now = datetime.now(timezone.utc)
206
+ date_str = now.strftime("%Y%m%d")
207
+ time_str = now.strftime("%H%M%S")
208
+ return f"mig_{date_str}_{time_str}_{safe_dirname}"
209
+
210
+ @classmethod
211
+ def load(cls, migration_id: str) -> 'MigrationPipeline':
212
+ """Load an existing migration by ID."""
213
+ if not re.match(r'^mig_\d{8}_\d{6}_[a-zA-Z0-9_-]+$', migration_id):
214
+ raise ValueError(
215
+ f"Invalid migration_id '{migration_id}': must match ^mig_YYYYMMDD_HHMMSS_<name>$"
216
+ )
217
+ migrations_dir = MIGRATIONS_DIR
218
+ migration_dir = os.path.join(migrations_dir, migration_id)
219
+ if not os.path.isdir(migration_dir):
220
+ raise FileNotFoundError(f"Migration not found: {migration_id}")
221
+ manifest_path = os.path.join(migration_dir, 'manifest.json')
222
+ if not os.path.isfile(manifest_path):
223
+ raise FileNotFoundError(f"Manifest not found for migration: {migration_id}")
224
+ with open(manifest_path) as f:
225
+ data = json.load(f)
226
+ # Reconstruct pipeline without re-creating directories
227
+ pipeline = cls.__new__(cls)
228
+ pipeline.codebase_path = data.get('source_info', {}).get('path', '')
229
+ pipeline.target = data.get('target_info', {}).get('target', '') or data.get('target_info', {}).get('language', '')
230
+ pipeline.options = {}
231
+ pipeline.migration_dir = Path(migration_dir)
232
+ pipeline.migration_id = migration_id
233
+ pipeline._lock = threading.Lock()
234
+ pipeline._logger = logging.getLogger('loki-migration')
235
+ return pipeline
236
+
237
+ # -- Manifest operations -------------------------------------------------
238
+
239
+ def create_manifest(self) -> MigrationManifest:
240
+ """Create the initial manifest.json for this migration."""
241
+ manifest = MigrationManifest(
242
+ id=self.migration_id,
243
+ created_at=_timestamp_iso(),
244
+ source_info={
245
+ "path": self.codebase_path,
246
+ "type": self.options.get("source_type", "unknown"),
247
+ },
248
+ target_info={
249
+ "target": self.target,
250
+ "options": self.options,
251
+ },
252
+ phases={
253
+ phase: {
254
+ "status": "in_progress" if phase == "understand" else "pending",
255
+ "started_at": _timestamp_iso() if phase == "understand" else "",
256
+ "completed_at": "",
257
+ }
258
+ for phase in PHASE_ORDER
259
+ },
260
+ feature_list_path=str(self.migration_dir / "features.json"),
261
+ migration_plan_path=str(self.migration_dir / "migration-plan.json"),
262
+ checkpoints=[],
263
+ )
264
+ self.save_manifest(manifest)
265
+ logger.info("Created migration manifest: %s", self.migration_id)
266
+ return manifest
267
+
268
+ def _load_manifest_unlocked(self) -> MigrationManifest:
269
+ """Load manifest.json from disk (caller must hold self._lock)."""
270
+ manifest_path = self.migration_dir / "manifest.json"
271
+ try:
272
+ data = json.loads(manifest_path.read_text(encoding="utf-8"))
273
+ valid_fields = {f.name for f in dataclasses.fields(MigrationManifest)}
274
+ filtered = {k: v for k, v in data.items() if k in valid_fields}
275
+ return MigrationManifest(**filtered)
276
+ except FileNotFoundError:
277
+ logger.warning("Manifest not found at %s", manifest_path)
278
+ raise
279
+ except (json.JSONDecodeError, TypeError) as exc:
280
+ logger.error("Corrupt manifest at %s: %s", manifest_path, exc)
281
+ raise
282
+
283
+ def _save_manifest_unlocked(self, manifest: MigrationManifest) -> None:
284
+ """Persist manifest to disk atomically (caller must hold self._lock)."""
285
+ manifest_path = self.migration_dir / "manifest.json"
286
+ content = json.dumps(asdict(manifest), indent=2, ensure_ascii=False)
287
+ _atomic_write(manifest_path, content)
288
+
289
+ def load_manifest(self) -> MigrationManifest:
290
+ """Load manifest.json from disk."""
291
+ with self._lock:
292
+ return self._load_manifest_unlocked()
293
+
294
+ def save_manifest(self, manifest: MigrationManifest) -> None:
295
+ """Persist manifest to disk atomically."""
296
+ with self._lock:
297
+ self._save_manifest_unlocked(manifest)
298
+
299
+ # -- Phase gate logic ----------------------------------------------------
300
+
301
+ def get_phase_status(self, phase: str) -> str:
302
+ """Return the status of a given phase from the manifest."""
303
+ if phase not in PHASE_ORDER:
304
+ raise ValueError(f"Unknown phase: {phase}")
305
+ manifest = self.load_manifest()
306
+ phase_data = manifest.phases.get(phase, {})
307
+ return phase_data.get("status", "pending")
308
+
309
+ def start_phase(self, phase: str) -> None:
310
+ """Start a phase (transition from pending to in_progress)."""
311
+ if phase not in PHASE_ORDER:
312
+ raise ValueError(f"Unknown phase: {phase}")
313
+ with self._lock:
314
+ manifest = self._load_manifest_unlocked()
315
+ current_status = manifest.phases[phase]["status"]
316
+ if current_status != "pending":
317
+ raise RuntimeError(
318
+ f"Cannot start phase '{phase}': status is '{current_status}', expected 'pending'"
319
+ )
320
+ manifest.phases[phase]["status"] = "in_progress"
321
+ manifest.phases[phase]["started_at"] = datetime.now(timezone.utc).isoformat()
322
+ self._save_manifest_unlocked(manifest)
323
+
324
+ def _check_phase_gate_unlocked(self, from_phase: str, to_phase: str) -> tuple[bool, str]:
325
+ """Validate phase transition (caller must hold self._lock or ensure safety).
326
+
327
+ This variant does not acquire locks, so it can be called from within
328
+ locked sections like advance_phase.
329
+ """
330
+ if from_phase not in PHASE_ORDER or to_phase not in PHASE_ORDER:
331
+ return False, f"Unknown phase: {from_phase} or {to_phase}"
332
+
333
+ from_idx = PHASE_ORDER.index(from_phase)
334
+ to_idx = PHASE_ORDER.index(to_phase)
335
+ if to_idx != from_idx + 1:
336
+ return False, f"Cannot jump from {from_phase} to {to_phase}"
337
+
338
+ # Gate: understand -> guardrail
339
+ if from_phase == "understand" and to_phase == "guardrail":
340
+ docs_dir = self.migration_dir / "docs"
341
+ has_docs = any(docs_dir.iterdir()) if docs_dir.exists() else False
342
+ if not has_docs:
343
+ return False, "Phase gate failed: no documentation generated in docs/"
344
+ seams_path = self.migration_dir / "seams.json"
345
+ if not seams_path.exists():
346
+ return False, "Phase gate failed: seams.json does not exist"
347
+ return True, "Gate passed: docs generated and seams.json exists"
348
+
349
+ # Gate: guardrail -> migrate
350
+ if from_phase == "guardrail" and to_phase == "migrate":
351
+ features_path = self.migration_dir / "features.json"
352
+ try:
353
+ data = json.loads(features_path.read_text(encoding="utf-8"))
354
+ features = [Feature(**f) for f in data]
355
+ except FileNotFoundError:
356
+ return False, "Phase gate failed: features.json not found"
357
+ if not features:
358
+ return False, "No features defined"
359
+ failing = [f for f in features if not f.passes]
360
+ if failing:
361
+ ids = ", ".join(f.id for f in failing[:5])
362
+ return False, f"Phase gate failed: {len(failing)} characterization tests not passing ({ids})"
363
+ return True, "Gate passed: all characterization tests pass"
364
+
365
+ # Gate: migrate -> verify
366
+ if from_phase == "migrate" and to_phase == "verify":
367
+ plan_path = self.migration_dir / "migration-plan.json"
368
+ try:
369
+ data = json.loads(plan_path.read_text(encoding="utf-8"))
370
+ steps_data = data.pop("steps", [])
371
+ plan = MigrationPlan(**data)
372
+ plan.steps = [MigrationStep(**s) for s in steps_data]
373
+ except FileNotFoundError:
374
+ return False, "Phase gate failed: migration-plan.json not found"
375
+ incomplete = [s for s in plan.steps if s.status != "completed"]
376
+ if incomplete:
377
+ ids = ", ".join(s.id for s in incomplete[:5])
378
+ return False, f"Phase gate failed: {len(incomplete)} steps not completed ({ids})"
379
+ return True, "Gate passed: all migration steps completed"
380
+
381
+ return True, "Gate passed"
382
+
383
+ def check_phase_gate(self, from_phase: str, to_phase: str) -> tuple[bool, str]:
384
+ """Validate whether transition from from_phase to to_phase is allowed.
385
+
386
+ Returns:
387
+ Tuple of (allowed, reason). If allowed is False, reason explains why.
388
+ """
389
+ if from_phase not in PHASE_ORDER or to_phase not in PHASE_ORDER:
390
+ return False, f"Unknown phase: {from_phase} or {to_phase}"
391
+
392
+ from_idx = PHASE_ORDER.index(from_phase)
393
+ to_idx = PHASE_ORDER.index(to_phase)
394
+ if to_idx != from_idx + 1:
395
+ return False, f"Cannot jump from {from_phase} to {to_phase}"
396
+
397
+ # Gate: understand -> guardrail
398
+ if from_phase == "understand" and to_phase == "guardrail":
399
+ docs_dir = self.migration_dir / "docs"
400
+ has_docs = any(docs_dir.iterdir()) if docs_dir.exists() else False
401
+ if not has_docs:
402
+ return False, "Phase gate failed: no documentation generated in docs/"
403
+ seams_path = self.migration_dir / "seams.json"
404
+ if not seams_path.exists():
405
+ return False, "Phase gate failed: seams.json does not exist"
406
+ return True, "Gate passed: docs generated and seams.json exists"
407
+
408
+ # Gate: guardrail -> migrate
409
+ if from_phase == "guardrail" and to_phase == "migrate":
410
+ try:
411
+ features = self.load_features()
412
+ except FileNotFoundError:
413
+ return False, "Phase gate failed: features.json not found"
414
+ if not features:
415
+ return False, "No features defined"
416
+ failing = [f for f in features if not f.passes]
417
+ if failing:
418
+ ids = ", ".join(f.id for f in failing[:5])
419
+ return False, f"Phase gate failed: {len(failing)} characterization tests not passing ({ids})"
420
+ return True, "Gate passed: all characterization tests pass"
421
+
422
+ # Gate: migrate -> verify
423
+ if from_phase == "migrate" and to_phase == "verify":
424
+ try:
425
+ plan = self.load_plan()
426
+ except FileNotFoundError:
427
+ return False, "Phase gate failed: migration-plan.json not found"
428
+ incomplete = [s for s in plan.steps if s.status != "completed"]
429
+ if incomplete:
430
+ ids = ", ".join(s.id for s in incomplete[:5])
431
+ return False, f"Phase gate failed: {len(incomplete)} steps not completed ({ids})"
432
+ return True, "Gate passed: all migration steps completed"
433
+
434
+ return True, "Gate passed"
435
+
436
+ def advance_phase(self, phase: str) -> PhaseResult:
437
+ """Mark the current phase as complete and start the next one.
438
+
439
+ Args:
440
+ phase: The phase that has just been completed.
441
+
442
+ Returns:
443
+ PhaseResult for the completed phase.
444
+ """
445
+ if phase not in PHASE_ORDER:
446
+ raise ValueError(f"Unknown phase: {phase}")
447
+
448
+ phase_idx = PHASE_ORDER.index(phase)
449
+ next_phase = PHASE_ORDER[phase_idx + 1] if phase_idx + 1 < len(PHASE_ORDER) else None
450
+
451
+ with self._lock:
452
+ # Enforce phase gate if there is a next phase (inside lock for consistency)
453
+ if next_phase is not None:
454
+ allowed, reason = self._check_phase_gate_unlocked(phase, next_phase)
455
+ if not allowed:
456
+ raise RuntimeError(f"Phase gate failed: {reason}")
457
+
458
+ manifest = self._load_manifest_unlocked()
459
+ now = _timestamp_iso()
460
+
461
+ # Verify current phase is in_progress before advancing
462
+ if phase in manifest.phases:
463
+ current_status = manifest.phases[phase].get("status", "pending")
464
+ if current_status != "in_progress":
465
+ raise RuntimeError(
466
+ f"Cannot advance phase '{phase}': status is '{current_status}', expected 'in_progress'"
467
+ )
468
+
469
+ # Mark current phase completed
470
+ if phase in manifest.phases:
471
+ manifest.phases[phase]["status"] = "completed"
472
+ manifest.phases[phase]["completed_at"] = now
473
+
474
+ # Start next phase if there is one
475
+ if next_phase is not None:
476
+ manifest.phases[next_phase]["status"] = "in_progress"
477
+ manifest.phases[next_phase]["started_at"] = now
478
+
479
+ self._save_manifest_unlocked(manifest)
480
+
481
+ result = PhaseResult(
482
+ phase=phase,
483
+ status="completed",
484
+ completed_at=now,
485
+ )
486
+ logger.info("Phase '%s' completed for migration %s", phase, self.migration_id)
487
+ return result
488
+
489
+ # -- Features CRUD -------------------------------------------------------
490
+
491
+ def load_features(self) -> list[Feature]:
492
+ """Load features from features.json."""
493
+ features_path = self.migration_dir / "features.json"
494
+ with self._lock:
495
+ try:
496
+ data = json.loads(features_path.read_text(encoding="utf-8"))
497
+ return [Feature(**f) for f in data]
498
+ except FileNotFoundError:
499
+ logger.warning("Features file not found: %s", features_path)
500
+ raise
501
+ except (json.JSONDecodeError, TypeError) as exc:
502
+ logger.error("Corrupt features file: %s", exc)
503
+ raise
504
+
505
+ def save_features(self, features: list[Feature]) -> None:
506
+ """Save features to features.json atomically."""
507
+ features_path = self.migration_dir / "features.json"
508
+ content = json.dumps(
509
+ [asdict(f) for f in features], indent=2, ensure_ascii=False
510
+ )
511
+ with self._lock:
512
+ _atomic_write(features_path, content)
513
+ logger.info("Saved %d features to %s", len(features), features_path)
514
+
515
+ # -- Plan CRUD -----------------------------------------------------------
516
+
517
+ def load_plan(self) -> MigrationPlan:
518
+ """Load migration plan from migration-plan.json."""
519
+ plan_path = self.migration_dir / "migration-plan.json"
520
+ with self._lock:
521
+ try:
522
+ data = json.loads(plan_path.read_text(encoding="utf-8"))
523
+ # Reconstruct nested MigrationStep objects
524
+ steps_data = data.pop("steps", [])
525
+ plan = MigrationPlan(**data)
526
+ plan.steps = [MigrationStep(**s) for s in steps_data]
527
+ return plan
528
+ except FileNotFoundError:
529
+ logger.warning("Plan file not found: %s", plan_path)
530
+ raise
531
+ except (json.JSONDecodeError, TypeError) as exc:
532
+ logger.error("Corrupt plan file: %s", exc)
533
+ raise
534
+
535
+ def save_plan(self, plan: MigrationPlan) -> None:
536
+ """Save migration plan to migration-plan.json atomically."""
537
+ plan_path = self.migration_dir / "migration-plan.json"
538
+ content = json.dumps(asdict(plan), indent=2, ensure_ascii=False)
539
+ with self._lock:
540
+ _atomic_write(plan_path, content)
541
+ logger.info("Saved migration plan (v%d) to %s", plan.version, plan_path)
542
+
543
+ # -- Seams CRUD ----------------------------------------------------------
544
+
545
+ def load_seams(self) -> list[SeamInfo]:
546
+ """Load seams from seams.json."""
547
+ seams_path = self.migration_dir / "seams.json"
548
+ with self._lock:
549
+ try:
550
+ data = json.loads(seams_path.read_text(encoding="utf-8"))
551
+ return [SeamInfo(**s) for s in data]
552
+ except FileNotFoundError:
553
+ logger.warning("Seams file not found: %s", seams_path)
554
+ raise
555
+ except (json.JSONDecodeError, TypeError) as exc:
556
+ logger.error("Corrupt seams file: %s", exc)
557
+ raise
558
+
559
+ def save_seams(self, seams: list[SeamInfo]) -> None:
560
+ """Save seams to seams.json atomically."""
561
+ seams_path = self.migration_dir / "seams.json"
562
+ content = json.dumps(
563
+ [asdict(s) for s in seams], indent=2, ensure_ascii=False
564
+ )
565
+ with self._lock:
566
+ _atomic_write(seams_path, content)
567
+ logger.info("Saved %d seams to %s", len(seams), seams_path)
568
+
569
+ # -- Checkpoints ---------------------------------------------------------
570
+
571
+ @staticmethod
572
+ def _validate_step_id(step_id: str) -> None:
573
+ """Validate step_id contains only safe characters for git tag names."""
574
+ if not re.match(r'^[a-zA-Z0-9_-]+$', step_id):
575
+ raise ValueError(
576
+ f"Invalid step_id '{step_id}': must match ^[a-zA-Z0-9_-]+$"
577
+ )
578
+
579
+ def create_checkpoint(self, step_id: str) -> str:
580
+ """Create a git tag checkpoint for a migration step.
581
+
582
+ Creates tag: loki-migrate/<step_id>/pre
583
+
584
+ Returns:
585
+ The tag name created.
586
+ """
587
+ self._validate_step_id(step_id)
588
+ tag_name = f"loki-migrate/{step_id}/pre"
589
+ try:
590
+ subprocess.run(
591
+ ["git", "tag", tag_name],
592
+ cwd=self.codebase_path,
593
+ check=True,
594
+ capture_output=True,
595
+ text=True,
596
+ )
597
+ except subprocess.CalledProcessError as exc:
598
+ logger.error("Failed to create checkpoint tag %s: %s", tag_name, exc.stderr)
599
+ raise RuntimeError(f"Git tag creation failed: {exc.stderr}") from exc
600
+
601
+ # Record in manifest (hold lock for entire read-modify-write)
602
+ try:
603
+ with self._lock:
604
+ manifest = self._load_manifest_unlocked()
605
+ manifest.checkpoints.append(tag_name)
606
+ self._save_manifest_unlocked(manifest)
607
+ except Exception:
608
+ # Bug 9: rollback git tag if manifest save fails
609
+ logger.error("Manifest save failed after git tag creation; deleting tag %s", tag_name)
610
+ try:
611
+ subprocess.run(
612
+ ["git", "tag", "-d", tag_name],
613
+ cwd=self.codebase_path,
614
+ check=True,
615
+ capture_output=True,
616
+ text=True,
617
+ )
618
+ except subprocess.CalledProcessError:
619
+ logger.error("Failed to delete orphaned git tag %s", tag_name)
620
+ raise
621
+
622
+ # Write checkpoint metadata
623
+ meta_path = self.migration_dir / "checkpoints" / f"{step_id}.json"
624
+ meta = {
625
+ "step_id": step_id,
626
+ "tag": tag_name,
627
+ "created_at": _timestamp_iso(),
628
+ }
629
+ _atomic_write(meta_path, json.dumps(meta, indent=2))
630
+ logger.info("Created checkpoint: %s", tag_name)
631
+ return tag_name
632
+
633
+ def rollback_to_checkpoint(self, step_id: str) -> None:
634
+ """Reset the codebase to the checkpoint tag for a given step.
635
+
636
+ Runs: git reset --hard loki-migrate/<step_id>/pre
637
+ """
638
+ self._validate_step_id(step_id)
639
+ tag_name = f"loki-migrate/{step_id}/pre"
640
+ try:
641
+ subprocess.run(
642
+ ["git", "reset", "--hard", tag_name],
643
+ cwd=self.codebase_path,
644
+ check=True,
645
+ capture_output=True,
646
+ text=True,
647
+ )
648
+ except subprocess.CalledProcessError as exc:
649
+ logger.error("Failed to rollback to %s: %s", tag_name, exc.stderr)
650
+ raise RuntimeError(f"Git rollback failed: {exc.stderr}") from exc
651
+ logger.info("Rolled back to checkpoint: %s", tag_name)
652
+
653
+ # -- Progress and summary ------------------------------------------------
654
+
655
+ def get_progress(self) -> dict[str, Any]:
656
+ """Return a progress summary of the migration.
657
+
658
+ Returns:
659
+ Dict with phase, step, features stats, last checkpoint, source,
660
+ target, completed_phases, and nested features/steps/checkpoint dicts.
661
+ """
662
+ manifest = self.load_manifest()
663
+
664
+ # Current phase and overall status
665
+ current_phase = "pending"
666
+ overall_status = "pending"
667
+ completed_phases: list[str] = []
668
+ for phase in PHASE_ORDER:
669
+ status = manifest.phases.get(phase, {}).get("status", "pending")
670
+ if status == "in_progress":
671
+ current_phase = phase
672
+ overall_status = "in_progress"
673
+ break
674
+ if status == "completed":
675
+ current_phase = phase
676
+ completed_phases.append(phase)
677
+ overall_status = "completed"
678
+
679
+ # Feature stats
680
+ features_total = 0
681
+ features_passing = 0
682
+ try:
683
+ features = self.load_features()
684
+ features_total = len(features)
685
+ features_passing = sum(1 for f in features if f.passes)
686
+ except (FileNotFoundError, json.JSONDecodeError, TypeError):
687
+ pass
688
+
689
+ # Step stats
690
+ steps_total = 0
691
+ steps_completed = 0
692
+ current_step = None
693
+ current_step_index = 0
694
+ try:
695
+ plan = self.load_plan()
696
+ steps_total = len(plan.steps)
697
+ steps_completed = sum(1 for s in plan.steps if s.status == "completed")
698
+ in_progress = [s for s in plan.steps if s.status == "in_progress"]
699
+ if in_progress:
700
+ current_step = in_progress[0].id
701
+ # Current step index: completed + 1 (1-based) or completed if all done
702
+ current_step_index = min(steps_completed + 1, steps_total) if steps_total else 0
703
+ except (FileNotFoundError, json.JSONDecodeError, TypeError):
704
+ pass
705
+
706
+ # Last checkpoint metadata
707
+ last_checkpoint_data: Optional[dict[str, Any]] = None
708
+ if manifest.checkpoints:
709
+ last_tag = manifest.checkpoints[-1]
710
+ # Try to read checkpoint metadata file
711
+ # Tag format: loki-migrate/<step_id>/pre
712
+ parts = last_tag.split("/")
713
+ if len(parts) >= 2:
714
+ cp_step_id = parts[1]
715
+ meta_path = self.migration_dir / "checkpoints" / f"{cp_step_id}.json"
716
+ try:
717
+ meta = json.loads(meta_path.read_text(encoding="utf-8"))
718
+ last_checkpoint_data = {
719
+ "tag": meta.get("tag", last_tag),
720
+ "step_id": meta.get("step_id", cp_step_id),
721
+ "timestamp": meta.get("created_at", ""),
722
+ }
723
+ except (FileNotFoundError, json.JSONDecodeError):
724
+ last_checkpoint_data = {"tag": last_tag, "step_id": "", "timestamp": ""}
725
+
726
+ return {
727
+ "migration_id": self.migration_id,
728
+ "status": overall_status,
729
+ "current_phase": current_phase,
730
+ "phases": manifest.phases,
731
+ "completed_phases": completed_phases,
732
+ "source": manifest.source_info,
733
+ "target": manifest.target_info,
734
+ "current_step": current_step,
735
+ "features": {"passing": features_passing, "total": features_total},
736
+ "steps": {"current": current_step_index, "completed": steps_completed, "total": steps_total},
737
+ "last_checkpoint": last_checkpoint_data,
738
+ "checkpoints_count": len(manifest.checkpoints),
739
+ }
740
+
741
+ def generate_plan_summary(self) -> str:
742
+ """Generate a human-readable plan summary for --show-plan.
743
+
744
+ Returns:
745
+ Formatted string representation of the migration plan.
746
+ """
747
+ try:
748
+ plan = self.load_plan()
749
+ except FileNotFoundError:
750
+ return "No migration plan found. Run the 'understand' phase first."
751
+
752
+ lines: list[str] = []
753
+ lines.append(f"Migration Plan v{plan.version}")
754
+ lines.append(f"Strategy: {plan.strategy}")
755
+ lines.append(f"Rollback: {plan.rollback_strategy}")
756
+ lines.append("")
757
+
758
+ if plan.constraints:
759
+ lines.append("Constraints:")
760
+ for c in plan.constraints:
761
+ lines.append(f" - {c}")
762
+ lines.append("")
763
+
764
+ lines.append(f"Steps ({len(plan.steps)} total):")
765
+ lines.append("-" * 60)
766
+
767
+ for step in plan.steps:
768
+ status_marker = {
769
+ "pending": "[ ]",
770
+ "in_progress": "[>]",
771
+ "completed": "[x]",
772
+ "failed": "[!]",
773
+ }.get(step.status, "[ ]")
774
+
775
+ lines.append(f" {status_marker} {step.id}: {step.description}")
776
+ lines.append(f" Type: {step.type} | Risk: {step.risk} | Tokens: {step.estimated_tokens}")
777
+ if step.files:
778
+ lines.append(f" Files: {', '.join(step.files[:5])}")
779
+ if len(step.files) > 5:
780
+ lines.append(f" ... and {len(step.files) - 5} more")
781
+ if step.depends_on:
782
+ lines.append(f" Depends on: {', '.join(step.depends_on)}")
783
+ if step.rollback_point:
784
+ lines.append(" [Rollback point]")
785
+ lines.append("")
786
+
787
+ if plan.exit_criteria:
788
+ lines.append("Exit Criteria:")
789
+ for key, val in plan.exit_criteria.items():
790
+ lines.append(f" {key}: {val}")
791
+
792
+ return "\n".join(lines)
793
+
794
+
795
+ # ---------------------------------------------------------------------------
796
+ # Singleton accessor
797
+ # ---------------------------------------------------------------------------
798
+
799
+ _pipeline_instance: Optional[MigrationPipeline] = None
800
+ _pipeline_lock = threading.Lock()
801
+
802
+
803
+ def get_migration_pipeline(
804
+ codebase_path: Optional[str] = None,
805
+ target: Optional[str] = None,
806
+ options: Optional[dict[str, Any]] = None,
807
+ ) -> MigrationPipeline:
808
+ """Get or create the singleton MigrationPipeline instance.
809
+
810
+ On first call, codebase_path and target are required.
811
+ Subsequent calls return the existing instance.
812
+ """
813
+ global _pipeline_instance
814
+ with _pipeline_lock:
815
+ if _pipeline_instance is None:
816
+ if codebase_path is None or target is None:
817
+ raise ValueError(
818
+ "codebase_path and target are required for first initialization"
819
+ )
820
+ _pipeline_instance = MigrationPipeline(
821
+ codebase_path=codebase_path,
822
+ target=target,
823
+ options=options,
824
+ )
825
+ return _pipeline_instance
826
+
827
+
828
+ def reset_migration_pipeline() -> None:
829
+ """Reset the singleton MigrationPipeline instance.
830
+
831
+ Useful for testing or when starting a new migration session.
832
+ """
833
+ global _pipeline_instance
834
+ with _pipeline_lock:
835
+ _pipeline_instance = None
836
+
837
+
838
+ # ---------------------------------------------------------------------------
839
+ # Utility: list all migrations
840
+ # ---------------------------------------------------------------------------
841
+
842
+
843
+ def list_migrations() -> list[dict[str, Any]]:
844
+ """List all migrations in ~/.loki/migrations/.
845
+
846
+ Returns:
847
+ List of dicts with id, created_at, source path, target, and status.
848
+ """
849
+ migrations_path = Path(MIGRATIONS_DIR)
850
+ if not migrations_path.exists():
851
+ return []
852
+
853
+ results: list[dict[str, Any]] = []
854
+ for entry in sorted(migrations_path.iterdir()):
855
+ if not entry.is_dir():
856
+ continue
857
+ manifest_file = entry / "manifest.json"
858
+ if not manifest_file.exists():
859
+ continue
860
+ try:
861
+ data = json.loads(manifest_file.read_text(encoding="utf-8"))
862
+ # Determine overall status from phases (clean string, no parenthesized phase)
863
+ phases = data.get("phases", {})
864
+ status = "pending"
865
+ for phase in PHASE_ORDER:
866
+ phase_status = phases.get(phase, {}).get("status", "pending")
867
+ if phase_status == "in_progress":
868
+ status = "in_progress"
869
+ break
870
+ if phase_status == "completed":
871
+ status = "completed"
872
+
873
+ source_info = data.get("source_info", {})
874
+ results.append({
875
+ "id": data.get("id", entry.name),
876
+ "created_at": data.get("created_at", ""),
877
+ "source": source_info,
878
+ "source_path": source_info.get("path", ""),
879
+ "target": data.get("target_info", {}).get("target", ""),
880
+ "status": status,
881
+ })
882
+ except (json.JSONDecodeError, OSError) as exc:
883
+ logger.warning("Skipping corrupt migration at %s: %s", entry, exc)
884
+ continue
885
+
886
+ return results