PyPI - any2heliosdb - Versions diffs - 0.9.1__py3-none-any.whl - Mend

any2heliosdb 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

any2heliosdb/__init__.py +16 -0
any2heliosdb/__main__.py +7 -0
any2heliosdb/assess/__init__.py +28 -0
any2heliosdb/assess/inventory.py +78 -0
any2heliosdb/assess/render.py +161 -0
any2heliosdb/assess/report.py +125 -0
any2heliosdb/cdc/__init__.py +0 -0
any2heliosdb/cdc/engine.py +143 -0
any2heliosdb/cdc/registry.py +76 -0
any2heliosdb/cdc/replicat.py +109 -0
any2heliosdb/cdc/sinks/__init__.py +0 -0
any2heliosdb/cdc/sources/__init__.py +0 -0
any2heliosdb/cdc/sources/mysql_binlog.py +193 -0
any2heliosdb/cdc/sources/oracle_scn.py +43 -0
any2heliosdb/cdc/trail.py +51 -0
any2heliosdb/chunking/__init__.py +0 -0
any2heliosdb/chunking/pk_range.py +65 -0
any2heliosdb/cli.py +489 -0
any2heliosdb/config/__init__.py +0 -0
any2heliosdb/config/model.py +85 -0
any2heliosdb/config/store.py +146 -0
any2heliosdb/config/wizard.py +119 -0
any2heliosdb/constants.py +148 -0
any2heliosdb/core/__init__.py +0 -0
any2heliosdb/core/catalog_model.py +367 -0
any2heliosdb/core/change_record.py +86 -0
any2heliosdb/core/identifiers.py +80 -0
any2heliosdb/core/loader.py +172 -0
any2heliosdb/core/manifest.py +304 -0
any2heliosdb/core/orchestrator.py +333 -0
any2heliosdb/emit/__init__.py +0 -0
any2heliosdb/emit/ddl.py +137 -0
any2heliosdb/emit/mysql_ddl.py +195 -0
any2heliosdb/emit/oracle_ddl.py +80 -0
any2heliosdb/errors.py +51 -0
any2heliosdb/geom/__init__.py +0 -0
any2heliosdb/mcp/__init__.py +45 -0
any2heliosdb/mcp/auth.py +179 -0
any2heliosdb/mcp/protocol.py +141 -0
any2heliosdb/mcp/server.py +222 -0
any2heliosdb/mcp/tools.py +554 -0
any2heliosdb/monitor/__init__.py +12 -0
any2heliosdb/monitor/live.py +240 -0
any2heliosdb/plsql/__init__.py +19 -0
any2heliosdb/plsql/cost.py +47 -0
any2heliosdb/plsql/gap.py +125 -0
any2heliosdb/plsql/rewrite.py +351 -0
any2heliosdb/sources/__init__.py +0 -0
any2heliosdb/sources/base.py +81 -0
any2heliosdb/sources/mssql/__init__.py +0 -0
any2heliosdb/sources/mssql/adapter.py +429 -0
any2heliosdb/sources/mysql/__init__.py +0 -0
any2heliosdb/sources/mysql/adapter.py +237 -0
any2heliosdb/sources/oracle/__init__.py +0 -0
any2heliosdb/sources/oracle/adapter.py +309 -0
any2heliosdb/sources/postgres/__init__.py +0 -0
any2heliosdb/sources/postgres/adapter.py +608 -0
any2heliosdb/target/__init__.py +0 -0
any2heliosdb/target/base.py +196 -0
any2heliosdb/target/capability.py +178 -0
any2heliosdb/target/copy_codec.py +88 -0
any2heliosdb/target/mysql_driver.py +239 -0
any2heliosdb/target/native_driver.py +205 -0
any2heliosdb/target/psycopg_driver.py +288 -0
any2heliosdb/typemap/__init__.py +0 -0
any2heliosdb/typemap/defaults.py +251 -0
any2heliosdb/typemap/registry.py +83 -0
any2heliosdb/validate/__init__.py +17 -0
any2heliosdb/validate/counts.py +56 -0
any2heliosdb/validate/data.py +244 -0
any2heliosdb/validate/model.py +60 -0
any2heliosdb/validate/structure.py +52 -0
any2heliosdb-0.9.1.dist-info/METADATA +319 -0
any2heliosdb-0.9.1.dist-info/RECORD +77 -0
any2heliosdb-0.9.1.dist-info/WHEEL +4 -0
any2heliosdb-0.9.1.dist-info/entry_points.txt +3 -0
any2heliosdb-0.9.1.dist-info/licenses/LICENSE +201 -0

any2heliosdb/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""Any2HeliosDB — migrate Oracle / MySQL / PostgreSQL / SQL Server into HeliosDB or stock PostgreSQL.
+A modern, Python successor to Ora2Pg, retargeted at HeliosDB (Lite, Full, and
+— via the portable psycopg/PG-wire path — Nano). The guiding principle is to
+prefer fixing/extending the target database over carrying translation logic in
+the tool, so the fork stays thin. Every incompatibility the tool works around is
+also emitted as a structured target-gap report.
+Importing this package is side-effect free and does not pull in any database
+driver; heavy imports (psycopg, oracledb, …) are deferred to the modules that
+actually open connections, so the pure-logic layers stay unit-testable without
+the drivers installed.
+"""
+__version__ = "0.9.1"
+__all__ = ["__version__"]

any2heliosdb/__main__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""Enable ``python -m any2heliosdb``."""
+from __future__ import annotations
+from .cli import main
+if __name__ == "__main__":
+    main()

any2heliosdb/assess/__init__.py ADDED Viewed

@@ -0,0 +1,28 @@
+"""Assessment module — the SHOW_* / SHOW_REPORT surface.
+Mirrors Ora2Pg's ``SHOW_VERSION`` / ``SHOW_SCHEMA`` / ``SHOW_TABLE`` /
+``SHOW_COLUMN`` inspection and ``SHOW_REPORT --estimate_cost`` migration-cost
+estimate, computed against the canonical IR
+(:mod:`any2heliosdb.core.catalog_model`) instead of a live catalog so the same
+report can be produced offline from an introspected schema.
+Public surface:
+* :func:`~any2heliosdb.assess.inventory.schema_inventory` — object/column counts.
+* :class:`~any2heliosdb.assess.report.AssessmentReport` + :func:`build_report`.
+* :mod:`~any2heliosdb.assess.render` — text / JSON / HTML renderers.
+"""
+from __future__ import annotations
+from .inventory import schema_inventory
+from .report import AssessmentReport, build_report
+from .render import render_html, render_json, render_text
+__all__ = [
+    "schema_inventory",
+    "AssessmentReport",
+    "build_report",
+    "render_text",
+    "render_json",
+    "render_html",
+]

any2heliosdb/assess/inventory.py ADDED Viewed

@@ -0,0 +1,78 @@
+"""Schema object inventory — the SHOW_SCHEMA / SHOW_TABLE / SHOW_COLUMN counts.
+Walks the canonical IR (:class:`~any2heliosdb.core.catalog_model.Schema`) and
+returns a plain, JSON-serializable ``dict`` of object counts plus a per-table
+column listing. Pure function, no I/O — the assessment report and the renderers
+consume this directly.
+"""
+from __future__ import annotations
+from typing import Dict, List
+from ..core.catalog_model import Schema
+def _column_entry(column) -> Dict[str, object]:
+    """One column's assessment view: name, verbatim source type, target SQL."""
+    source_type = column.source_type or column.data_type.sql()
+    return {
+        "name": column.name,
+        "source_type": source_type,
+        "target_sql": column.data_type.sql(),
+        "nullable": bool(column.nullable),
+    }
+def schema_inventory(schema: Schema) -> Dict[str, object]:
+    """Return object/column counts plus a per-table column list for *schema*.
+    The returned dict is deliberately flat and JSON-friendly::
+        {
+          "schema": "HR",
+          "counts": {"tables": 2, "columns": 5, "views": 0, "sequences": 1,
+                     "routines": 0, "triggers": 0, "indexes": 1,
+                     "foreign_keys": 1, "types": 0},
+          "tables": [
+             {"name": "EMPLOYEES", "schema": "HR", "column_count": 3,
+              "columns": [ {column entries...} ]},
+             ...
+          ],
+        }
+    """
+    tables: List[Dict[str, object]] = []
+    total_columns = 0
+    total_indexes = 0
+    total_foreign_keys = 0
+    for table in schema.tables:
+        cols = [_column_entry(c) for c in table.columns]
+        total_columns += len(cols)
+        total_indexes += len(table.indexes)
+        total_foreign_keys += len(table.foreign_keys)
+        tables.append(
+            {
+                "name": table.name,
+                "schema": table.schema,
+                "column_count": len(cols),
+                "columns": cols,
+            }
+        )
+    counts: Dict[str, int] = {
+        "tables": len(schema.tables),
+        "columns": total_columns,
+        "views": len(schema.views),
+        "sequences": len(schema.sequences),
+        "routines": len(schema.routines),
+        "triggers": len(schema.triggers),
+        "indexes": total_indexes,
+        "foreign_keys": total_foreign_keys,
+        "types": len(schema.types),
+    }
+    return {
+        "schema": schema.name,
+        "counts": counts,
+        "tables": tables,
+    }

any2heliosdb/assess/render.py ADDED Viewed

@@ -0,0 +1,161 @@
+"""Renderers for an :class:`~any2heliosdb.assess.report.AssessmentReport`.
+Three surfaces, mirroring Ora2Pg's report outputs:
+* :func:`render_text` — a compact plain-text summary for the terminal.
+* :func:`render_json` — ``json.dumps`` of the report (machine-readable).
+* :func:`render_html` — a standalone HTML page via a small Jinja2 template.
+"""
+from __future__ import annotations
+import json
+from dataclasses import asdict
+from typing import Any, Dict
+from jinja2 import Environment
+from .report import AssessmentReport
+def _as_dict(report: AssessmentReport) -> Dict[str, Any]:
+    """Report as a plain dict. ``str``-Enums (Edition) serialize as their value."""
+    data = asdict(report)
+    # ``asdict`` keeps the Enum instance; normalize to its plain string value so
+    # both JSON and the HTML template see a string.
+    data["edition"] = getattr(report.edition, "value", report.edition)
+    return data
+def render_json(report: AssessmentReport) -> str:
+    """Serialize the full report to indented JSON."""
+    return json.dumps(_as_dict(report), indent=2, sort_keys=True)
+def render_text(report: AssessmentReport) -> str:
+    """Render a compact, human-readable plain-text summary."""
+    counts = report.inventory.get("counts", {})
+    lines = []
+    lines.append("=" * 60)
+    lines.append("HeliosDB Migration Assessment")
+    lines.append("=" * 60)
+    lines.append("Source dialect : {}".format(report.source_dialect))
+    lines.append("Target edition : {}".format(getattr(report.edition, "value", report.edition)))
+    lines.append("Schema         : {}".format(report.inventory.get("schema", "")))
+    lines.append("")
+    lines.append("Object inventory")
+    lines.append("-" * 60)
+    for key in (
+        "tables",
+        "columns",
+        "views",
+        "sequences",
+        "routines",
+        "triggers",
+        "indexes",
+        "foreign_keys",
+        "types",
+    ):
+        if key in counts:
+            lines.append("  {:<14}: {}".format(key, counts[key]))
+    lines.append("")
+    lines.append("Tables")
+    lines.append("-" * 60)
+    for table in report.inventory.get("tables", []):
+        lines.append(
+            "  {} ({} columns)".format(table.get("name"), table.get("column_count", 0))
+        )
+        for col in table.get("columns", []):
+            lines.append(
+                "      {:<24} {} -> {}".format(
+                    col.get("name", ""),
+                    col.get("source_type", ""),
+                    col.get("target_sql", ""),
+                )
+            )
+    lines.append("")
+    lines.append("Estimated migration cost: {} person-days".format(report.cost_person_days))
+    if report.gaps:
+        lines.append("")
+        lines.append("Gaps ({})".format(len(report.gaps)))
+        lines.append("-" * 60)
+        for gap in report.gaps:
+            lines.append("  - {}".format(gap))
+    lines.append("=" * 60)
+    return "\n".join(lines)
+_HTML_TEMPLATE = """<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<title>HeliosDB Migration Assessment - {{ schema }}</title>
+<style>
+  body { font-family: -apple-system, Segoe UI, Roboto, sans-serif; margin: 2rem; }
+  h1 { font-size: 1.5rem; }
+  table { border-collapse: collapse; margin-bottom: 1.5rem; }
+  th, td { border: 1px solid #ccc; padding: 4px 10px; text-align: left; }
+  th { background: #f4f4f4; }
+  .meta td:first-child { font-weight: bold; }
+  caption { font-weight: bold; text-align: left; margin-bottom: 4px; }
+</style>
+</head>
+<body>
+<h1>HeliosDB Migration Assessment</h1>
+<table class="meta">
+  <tr><td>Source dialect</td><td>{{ source_dialect }}</td></tr>
+  <tr><td>Target edition</td><td>{{ edition }}</td></tr>
+  <tr><td>Schema</td><td>{{ schema }}</td></tr>
+  <tr><td>Estimated cost</td><td>{{ cost_person_days }} person-days</td></tr>
+</table>
+<table>
+  <caption>Object inventory</caption>
+  <tr><th>Object</th><th>Count</th></tr>
+  {% for key, value in counts.items() %}
+  <tr><td>{{ key }}</td><td>{{ value }}</td></tr>
+  {% endfor %}
+</table>
+{% for table in tables %}
+<table>
+  <caption>{{ table.name }} ({{ table.column_count }} columns)</caption>
+  <tr><th>Column</th><th>Source type</th><th>Target SQL</th><th>Nullable</th></tr>
+  {% for col in table.columns %}
+  <tr>
+    <td>{{ col.name }}</td>
+    <td>{{ col.source_type }}</td>
+    <td>{{ col.target_sql }}</td>
+    <td>{{ col.nullable }}</td>
+  </tr>
+  {% endfor %}
+</table>
+{% endfor %}
+{% if gaps %}
+<table>
+  <caption>Gaps ({{ gaps|length }})</caption>
+  <tr><th>Detail</th></tr>
+  {% for gap in gaps %}
+  <tr><td>{{ gap }}</td></tr>
+  {% endfor %}
+</table>
+{% endif %}
+</body>
+</html>
+"""
+def render_html(report: AssessmentReport) -> str:
+    """Render the report as a standalone HTML page via Jinja2."""
+    env = Environment(autoescape=True)
+    template = env.from_string(_HTML_TEMPLATE)
+    inventory = report.inventory
+    return template.render(
+        source_dialect=report.source_dialect,
+        edition=getattr(report.edition, "value", report.edition),
+        schema=inventory.get("schema", ""),
+        cost_person_days=report.cost_person_days,
+        counts=inventory.get("counts", {}),
+        tables=inventory.get("tables", []),
+        gaps=report.gaps,
+    )

any2heliosdb/assess/report.py ADDED Viewed

@@ -0,0 +1,125 @@
+"""The assessment report — Ora2Pg ``SHOW_REPORT --estimate_cost`` analogue.
+:func:`build_report` combines:
+* the schema **inventory** (object/column counts, per-table columns),
+* **type provenance** — for every table column, what the :class:`TypeRegistry`
+  resolved the verbatim source type to, and whether that came from a default
+  mapping or a user ``DATA_TYPE`` / ``MODIFY_TYPE`` override, and
+* a coarse **migration-cost** estimate in person-days.
+The real PL/SQL translation cost is produced by the ``plsql`` module and arrives
+as ``gap_report``; here we apply only a deliberately simple placeholder
+heuristic (routines + triggers) so the report is useful before that lands.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+from ..constants import Edition
+from ..core.catalog_model import Schema
+from ..typemap.registry import TypeRegistry
+from .inventory import schema_inventory
+# Placeholder cost weights (person-days). Real PL/SQL cost replaces these once
+# the plsql module's gap report is wired in.
+_COST_PER_ROUTINE = 0.25
+_COST_PER_TRIGGER = 0.1
+@dataclass
+class AssessmentReport:
+    """Structured result of assessing one schema against a target edition."""
+    source_dialect: str
+    edition: Edition
+    inventory: Dict[str, Any]
+    type_provenance: List[Dict[str, Any]] = field(default_factory=list)
+    cost_person_days: float = 0.0
+    gaps: List[Dict[str, Any]] = field(default_factory=list)
+def _gaps_to_list(gap_report: Optional[Any]) -> List[Dict[str, Any]]:
+    """Coerce an optional gap report into a JSON-serializable list of dicts.
+    Tolerant of shapes because the producing ``plsql`` module is developed in
+    parallel: accepts ``None``, an object exposing a ``.gaps`` iterable, or a
+    bare iterable. Each item is normalized to a dict; items already dict-like or
+    dataclass-like are passed through, others are stringified.
+    """
+    if gap_report is None:
+        return []
+    items = getattr(gap_report, "gaps", gap_report)
+    out: List[Dict[str, Any]] = []
+    try:
+        iterator = iter(items)
+    except TypeError:
+        return out
+    for item in iterator:
+        out.append(_gap_item_to_dict(item))
+    return out
+def _gap_item_to_dict(item: Any) -> Dict[str, Any]:
+    if isinstance(item, dict):
+        return dict(item)
+    as_dict = getattr(item, "__dict__", None)
+    if as_dict:
+        result: Dict[str, Any] = {}
+        for key, value in as_dict.items():
+            # Unwrap str-Enums (e.g. Severity) to their plain string value.
+            result[key] = getattr(value, "value", value)
+        return result
+    return {"detail": str(item)}
+def build_report(
+    schema: Schema,
+    registry: TypeRegistry,
+    edition: Edition = Edition.UNKNOWN,
+    gap_report: Optional[Any] = None,
+) -> AssessmentReport:
+    """Build an :class:`AssessmentReport` for *schema* against *edition*.
+    For every table column, ``registry.resolve`` is consulted (keyed by the
+    verbatim ``source_type`` when present, else the column's resolved target
+    SQL) and the resulting (source type -> target SQL + provenance) mapping is
+    recorded in ``type_provenance``.
+    """
+    inventory = schema_inventory(schema)
+    type_provenance: List[Dict[str, Any]] = []
+    for table in schema.tables:
+        for column in table.columns:
+            source_type = column.source_type or column.data_type.sql()
+            resolved = registry.resolve(
+                source_type,
+                table=table.name,
+                column=column.name,
+                schema=table.schema,
+            )
+            type_provenance.append(
+                {
+                    "table": table.fqn,
+                    "column": column.name,
+                    "source_type": source_type,
+                    "target_sql": resolved.data_type.sql(),
+                    "provenance": resolved.provenance.value,
+                }
+            )
+    cost_person_days = round(
+        _COST_PER_ROUTINE * len(schema.routines)
+        + _COST_PER_TRIGGER * len(schema.triggers),
+        2,
+    )
+    return AssessmentReport(
+        source_dialect=registry.dialect.value,
+        edition=edition,
+        inventory=inventory,
+        type_provenance=type_provenance,
+        cost_person_days=cost_person_days,
+        gaps=_gaps_to_list(gap_report),
+    )

any2heliosdb/cdc/__init__.py ADDED Viewed

File without changes

any2heliosdb/cdc/engine.py ADDED Viewed

@@ -0,0 +1,143 @@
+"""CDC engine: wires registry + source capture + trail + replicat apply.
+Symmetric Extract -> trail -> Replicat so capture and apply advance on their own
+durable cursors. v1 source is Oracle SCN-watermark; the trail and replicat are
+source-agnostic, so log-based sources (v2) and HeliosDB-as-source drop in here.
+"""
+from __future__ import annotations
+import os
+import re
+from typing import Dict, List
+from ..errors import Any2HeliosError
+from .registry import CdcRegistry, Extract
+from .replicat import Replicat
+from .sources.oracle_scn import OracleScnSource
+from .trail import Trail
+# HeliosDB-Nano resolved INSERT ... ON CONFLICT DO UPDATE's quoted SET target in
+# v3.58.2 (#34), and v3.58.3 accepts E'...' escaped string literals as values so
+# the replicat's bytea ON CONFLICT upsert (psycopg escapes bytea params as
+# E'\\x..') works. Require 3.58.3 so keyed CDC apply is correct for binary data.
+_NANO_MIN_CDC_VERSION = (3, 58, 3)
+def _version_tuple(version: str):  # type: ignore[no-untyped-def]
+    """First X.Y.Z in a HeliosDB version banner as an int tuple, else None."""
+    m = re.search(r"(\d+)\.(\d+)\.(\d+)", version or "")
+    return tuple(int(g) for g in m.groups()) if m else None
+def _registry_path(cfg) -> str:  # type: ignore[no-untyped-def]
+    return os.path.join(cfg.options.output_dir, "cdc.db")
+def _trail_dir(cfg, name: str) -> str:  # type: ignore[no-untyped-def]
+    return os.path.join(cfg.options.output_dir, "trail", name)
+def _binlog_pos_file(cfg, name: str) -> str:  # type: ignore[no-untyped-def]
+    return os.path.join(_trail_dir(cfg, name), "binlog.pos")
+def run_extract(cfg, name: str) -> Dict[str, object]:  # type: ignore[no-untyped-def]
+    from ..config.store import build_source_adapter
+    from ..constants import SourceDialect
+    reg = CdcRegistry(_registry_path(cfg))
+    adapter = build_source_adapter(cfg)
+    adapter.connect()
+    try:
+        schema_ir = adapter.introspect_schema(cfg.source.schema)
+        schema_name = cfg.source.schema or schema_ir.name
+        reg.register(name, schema_name, [t.name for t in schema_ir.tables])
+        ext = reg.get(name)
+        assert ext is not None
+        trail = Trail(_trail_dir(cfg, name))
+        if cfg.source.dialect is SourceDialect.MYSQL:
+            # Log-based capture: real I/U/D from the binlog. Cursor is the binlog
+            # coordinate, persisted in a small pos file alongside the trail.
+            from .sources.mysql_binlog import MySqlBinlogSource
+            posf = _binlog_pos_file(cfg, name)
+            since = ""
+            if os.path.exists(posf):
+                with open(posf) as f:
+                    since = f.read().strip()
+            source = MySqlBinlogSource(cfg.source.to_dsn(), schema_name, schema_ir.tables)
+            records, new_pos = source.capture(since)
+            captured = trail.append(records)
+            with open(posf, "w") as f:
+                f.write(new_pos)
+            return {"captured": captured, "watermark": new_pos,
+                    "since": since or "(current)", "skipped": [], "mode": "binlog"}
+        # Default: Oracle SCN-watermark capture.
+        source = OracleScnSource(adapter, schema_name, schema_ir.tables)
+        records, new_watermark, skipped = source.capture(ext.watermark)
+        captured = trail.append(records)
+        reg.set_watermark(name, new_watermark)
+        return {"captured": captured, "watermark": new_watermark,
+                "since": ext.watermark, "skipped": skipped, "mode": "scn"}
+    finally:
+        adapter.close()
+        reg.close()
+def run_replicat(cfg, name: str, reconcile_deletes: bool = True) -> Dict[str, object]:  # type: ignore[no-untyped-def]
+    from ..config.store import build_source_adapter, build_target_driver
+    from ..constants import Edition
+    reg = CdcRegistry(_registry_path(cfg))
+    try:
+        ext = reg.get(name)
+        if ext is None:
+            raise Any2HeliosError("no such extract '{}'; run `a2h extract {}` first".format(name, name))
+        # Keep the source open: it supplies the apply-side schema (PKs/columns) and,
+        # for delete reconciliation, the current key set.
+        adapter = build_source_adapter(cfg)
+        adapter.connect()
+        target = build_target_driver(cfg)
+        target.connect()
+        try:
+            # Gate the apply on a live capability probe: refuse editions whose
+            # keyed upsert can't run, with a clear message instead of a cryptic
+            # mid-apply SQL error.
+            caps = target.probe_capabilities()
+            if caps.edition is Edition.NANO:
+                ver = _version_tuple(caps.server_version)
+                if ver is None or ver < _NANO_MIN_CDC_VERSION:
+                    raise Any2HeliosError(
+                        "CDC apply (replicat) on HeliosDB-Nano requires >= {}: before "
+                        "that, INSERT ... ON CONFLICT DO UPDATE couldn't resolve a quoted "
+                        "SET target and silently corrupted keyed upserts (#34). Detected "
+                        "Nano version {!r}. Upgrade Nano, or use `a2h migrate` for a "
+                        "one-shot load.".format(
+                            ".".join(map(str, _NANO_MIN_CDC_VERSION)),
+                            caps.server_version or "unknown"))
+            schema_ir = adapter.introspect_schema(ext.schema)
+            rep = Replicat(target, schema_ir, cfg.options.preserve_case)
+            records, new_cursor = Trail(_trail_dir(cfg, name)).read(ext.apply_cursor)
+            applied, warnings = rep.apply(records)
+            reg.set_apply_cursor(name, new_cursor)
+            deleted = 0
+            if reconcile_deletes:
+                deleted, dwarn = rep.reconcile_deletes(adapter)
+                warnings = warnings + dwarn
+            return {"applied": applied, "deleted": deleted, "cursor": new_cursor,
+                    "read": len(records), "warnings": warnings}
+        finally:
+            target.close()
+            adapter.close()
+    finally:
+        reg.close()
+def list_extracts(cfg) -> List[Extract]:  # type: ignore[no-untyped-def]
+    reg = CdcRegistry(_registry_path(cfg))
+    try:
+        return reg.list()
+    finally:
+        reg.close()

any2heliosdb/cdc/registry.py ADDED Viewed

@@ -0,0 +1,76 @@
+"""Persistent catalog of named CDC extracts (sqlite).
+Each extract row carries its capture **watermark** (highest SCN captured) and
+the replicat **apply cursor** (trail lines already applied), so capture and
+apply advance independently and survive process restarts.
+"""
+from __future__ import annotations
+import os
+import sqlite3
+from dataclasses import dataclass
+from typing import List, Optional
+@dataclass
+class Extract:
+    name: str
+    schema: str
+    tables: List[str]
+    watermark: int
+    apply_cursor: int
+    state: str
+class CdcRegistry:
+    def __init__(self, path: str) -> None:
+        os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
+        self._db = sqlite3.connect(path)
+        self._db.execute("PRAGMA journal_mode=WAL")
+        self._db.execute(
+            "CREATE TABLE IF NOT EXISTS extracts ("
+            "  name TEXT PRIMARY KEY,"
+            "  schema TEXT,"
+            "  tables_csv TEXT,"
+            "  watermark INTEGER NOT NULL DEFAULT 0,"
+            "  apply_cursor INTEGER NOT NULL DEFAULT 0,"
+            "  state TEXT NOT NULL DEFAULT 'registered')"
+        )
+        self._db.commit()
+    def register(self, name: str, schema: str, tables: List[str]) -> None:
+        """Create the extract if absent; refresh its table set if it exists."""
+        self._db.execute(
+            "INSERT INTO extracts (name, schema, tables_csv) VALUES (?,?,?) "
+            "ON CONFLICT(name) DO UPDATE SET schema=excluded.schema, tables_csv=excluded.tables_csv",
+            (name, schema, ",".join(tables)),
+        )
+        self._db.commit()
+    def get(self, name: str) -> Optional[Extract]:
+        row = self._db.execute(
+            "SELECT name, schema, tables_csv, watermark, apply_cursor, state "
+            "FROM extracts WHERE name=?", (name,)
+        ).fetchone()
+        if not row:
+            return None
+        return Extract(row[0], row[1], [t for t in (row[2] or "").split(",") if t],
+                       int(row[3]), int(row[4]), row[5])
+    def list(self) -> List[Extract]:
+        return [Extract(r[0], r[1], [t for t in (r[2] or "").split(",") if t],
+                        int(r[3]), int(r[4]), r[5])
+                for r in self._db.execute(
+                    "SELECT name, schema, tables_csv, watermark, apply_cursor, state "
+                    "FROM extracts ORDER BY name").fetchall()]
+    def set_watermark(self, name: str, scn: int) -> None:
+        self._db.execute("UPDATE extracts SET watermark=?, state='capturing' WHERE name=?", (scn, name))
+        self._db.commit()
+    def set_apply_cursor(self, name: str, cursor: int) -> None:
+        self._db.execute("UPDATE extracts SET apply_cursor=?, state='applying' WHERE name=?", (cursor, name))
+        self._db.commit()
+    def close(self) -> None:
+        self._db.close()