any2heliosdb 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. any2heliosdb/__init__.py +16 -0
  2. any2heliosdb/__main__.py +7 -0
  3. any2heliosdb/assess/__init__.py +28 -0
  4. any2heliosdb/assess/inventory.py +78 -0
  5. any2heliosdb/assess/render.py +161 -0
  6. any2heliosdb/assess/report.py +125 -0
  7. any2heliosdb/cdc/__init__.py +0 -0
  8. any2heliosdb/cdc/engine.py +143 -0
  9. any2heliosdb/cdc/registry.py +76 -0
  10. any2heliosdb/cdc/replicat.py +109 -0
  11. any2heliosdb/cdc/sinks/__init__.py +0 -0
  12. any2heliosdb/cdc/sources/__init__.py +0 -0
  13. any2heliosdb/cdc/sources/mysql_binlog.py +193 -0
  14. any2heliosdb/cdc/sources/oracle_scn.py +43 -0
  15. any2heliosdb/cdc/trail.py +51 -0
  16. any2heliosdb/chunking/__init__.py +0 -0
  17. any2heliosdb/chunking/pk_range.py +65 -0
  18. any2heliosdb/cli.py +489 -0
  19. any2heliosdb/config/__init__.py +0 -0
  20. any2heliosdb/config/model.py +85 -0
  21. any2heliosdb/config/store.py +146 -0
  22. any2heliosdb/config/wizard.py +119 -0
  23. any2heliosdb/constants.py +148 -0
  24. any2heliosdb/core/__init__.py +0 -0
  25. any2heliosdb/core/catalog_model.py +367 -0
  26. any2heliosdb/core/change_record.py +86 -0
  27. any2heliosdb/core/identifiers.py +80 -0
  28. any2heliosdb/core/loader.py +172 -0
  29. any2heliosdb/core/manifest.py +304 -0
  30. any2heliosdb/core/orchestrator.py +333 -0
  31. any2heliosdb/emit/__init__.py +0 -0
  32. any2heliosdb/emit/ddl.py +137 -0
  33. any2heliosdb/emit/mysql_ddl.py +195 -0
  34. any2heliosdb/emit/oracle_ddl.py +80 -0
  35. any2heliosdb/errors.py +51 -0
  36. any2heliosdb/geom/__init__.py +0 -0
  37. any2heliosdb/mcp/__init__.py +45 -0
  38. any2heliosdb/mcp/auth.py +179 -0
  39. any2heliosdb/mcp/protocol.py +141 -0
  40. any2heliosdb/mcp/server.py +222 -0
  41. any2heliosdb/mcp/tools.py +554 -0
  42. any2heliosdb/monitor/__init__.py +12 -0
  43. any2heliosdb/monitor/live.py +240 -0
  44. any2heliosdb/plsql/__init__.py +19 -0
  45. any2heliosdb/plsql/cost.py +47 -0
  46. any2heliosdb/plsql/gap.py +125 -0
  47. any2heliosdb/plsql/rewrite.py +351 -0
  48. any2heliosdb/sources/__init__.py +0 -0
  49. any2heliosdb/sources/base.py +81 -0
  50. any2heliosdb/sources/mssql/__init__.py +0 -0
  51. any2heliosdb/sources/mssql/adapter.py +429 -0
  52. any2heliosdb/sources/mysql/__init__.py +0 -0
  53. any2heliosdb/sources/mysql/adapter.py +237 -0
  54. any2heliosdb/sources/oracle/__init__.py +0 -0
  55. any2heliosdb/sources/oracle/adapter.py +309 -0
  56. any2heliosdb/sources/postgres/__init__.py +0 -0
  57. any2heliosdb/sources/postgres/adapter.py +608 -0
  58. any2heliosdb/target/__init__.py +0 -0
  59. any2heliosdb/target/base.py +196 -0
  60. any2heliosdb/target/capability.py +178 -0
  61. any2heliosdb/target/copy_codec.py +88 -0
  62. any2heliosdb/target/mysql_driver.py +239 -0
  63. any2heliosdb/target/native_driver.py +205 -0
  64. any2heliosdb/target/psycopg_driver.py +288 -0
  65. any2heliosdb/typemap/__init__.py +0 -0
  66. any2heliosdb/typemap/defaults.py +251 -0
  67. any2heliosdb/typemap/registry.py +83 -0
  68. any2heliosdb/validate/__init__.py +17 -0
  69. any2heliosdb/validate/counts.py +56 -0
  70. any2heliosdb/validate/data.py +244 -0
  71. any2heliosdb/validate/model.py +60 -0
  72. any2heliosdb/validate/structure.py +52 -0
  73. any2heliosdb-0.9.1.dist-info/METADATA +319 -0
  74. any2heliosdb-0.9.1.dist-info/RECORD +77 -0
  75. any2heliosdb-0.9.1.dist-info/WHEEL +4 -0
  76. any2heliosdb-0.9.1.dist-info/entry_points.txt +3 -0
  77. any2heliosdb-0.9.1.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,16 @@
1
+ """Any2HeliosDB — migrate Oracle / MySQL / PostgreSQL / SQL Server into HeliosDB or stock PostgreSQL.
2
+
3
+ A modern, Python successor to Ora2Pg, retargeted at HeliosDB (Lite, Full, and
4
+ — via the portable psycopg/PG-wire path — Nano). The guiding principle is to
5
+ prefer fixing/extending the target database over carrying translation logic in
6
+ the tool, so the fork stays thin. Every incompatibility the tool works around is
7
+ also emitted as a structured target-gap report.
8
+
9
+ Importing this package is side-effect free and does not pull in any database
10
+ driver; heavy imports (psycopg, oracledb, …) are deferred to the modules that
11
+ actually open connections, so the pure-logic layers stay unit-testable without
12
+ the drivers installed.
13
+ """
14
+
15
+ __version__ = "0.9.1"
16
+ __all__ = ["__version__"]
@@ -0,0 +1,7 @@
1
+ """Enable ``python -m any2heliosdb``."""
2
+ from __future__ import annotations
3
+
4
+ from .cli import main
5
+
6
+ if __name__ == "__main__":
7
+ main()
@@ -0,0 +1,28 @@
1
+ """Assessment module — the SHOW_* / SHOW_REPORT surface.
2
+
3
+ Mirrors Ora2Pg's ``SHOW_VERSION`` / ``SHOW_SCHEMA`` / ``SHOW_TABLE`` /
4
+ ``SHOW_COLUMN`` inspection and ``SHOW_REPORT --estimate_cost`` migration-cost
5
+ estimate, computed against the canonical IR
6
+ (:mod:`any2heliosdb.core.catalog_model`) instead of a live catalog so the same
7
+ report can be produced offline from an introspected schema.
8
+
9
+ Public surface:
10
+
11
+ * :func:`~any2heliosdb.assess.inventory.schema_inventory` — object/column counts.
12
+ * :class:`~any2heliosdb.assess.report.AssessmentReport` + :func:`build_report`.
13
+ * :mod:`~any2heliosdb.assess.render` — text / JSON / HTML renderers.
14
+ """
15
+ from __future__ import annotations
16
+
17
+ from .inventory import schema_inventory
18
+ from .report import AssessmentReport, build_report
19
+ from .render import render_html, render_json, render_text
20
+
21
+ __all__ = [
22
+ "schema_inventory",
23
+ "AssessmentReport",
24
+ "build_report",
25
+ "render_text",
26
+ "render_json",
27
+ "render_html",
28
+ ]
@@ -0,0 +1,78 @@
1
+ """Schema object inventory — the SHOW_SCHEMA / SHOW_TABLE / SHOW_COLUMN counts.
2
+
3
+ Walks the canonical IR (:class:`~any2heliosdb.core.catalog_model.Schema`) and
4
+ returns a plain, JSON-serializable ``dict`` of object counts plus a per-table
5
+ column listing. Pure function, no I/O — the assessment report and the renderers
6
+ consume this directly.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ from typing import Dict, List
11
+
12
+ from ..core.catalog_model import Schema
13
+
14
+
15
+ def _column_entry(column) -> Dict[str, object]:
16
+ """One column's assessment view: name, verbatim source type, target SQL."""
17
+ source_type = column.source_type or column.data_type.sql()
18
+ return {
19
+ "name": column.name,
20
+ "source_type": source_type,
21
+ "target_sql": column.data_type.sql(),
22
+ "nullable": bool(column.nullable),
23
+ }
24
+
25
+
26
+ def schema_inventory(schema: Schema) -> Dict[str, object]:
27
+ """Return object/column counts plus a per-table column list for *schema*.
28
+
29
+ The returned dict is deliberately flat and JSON-friendly::
30
+
31
+ {
32
+ "schema": "HR",
33
+ "counts": {"tables": 2, "columns": 5, "views": 0, "sequences": 1,
34
+ "routines": 0, "triggers": 0, "indexes": 1,
35
+ "foreign_keys": 1, "types": 0},
36
+ "tables": [
37
+ {"name": "EMPLOYEES", "schema": "HR", "column_count": 3,
38
+ "columns": [ {column entries...} ]},
39
+ ...
40
+ ],
41
+ }
42
+ """
43
+ tables: List[Dict[str, object]] = []
44
+ total_columns = 0
45
+ total_indexes = 0
46
+ total_foreign_keys = 0
47
+
48
+ for table in schema.tables:
49
+ cols = [_column_entry(c) for c in table.columns]
50
+ total_columns += len(cols)
51
+ total_indexes += len(table.indexes)
52
+ total_foreign_keys += len(table.foreign_keys)
53
+ tables.append(
54
+ {
55
+ "name": table.name,
56
+ "schema": table.schema,
57
+ "column_count": len(cols),
58
+ "columns": cols,
59
+ }
60
+ )
61
+
62
+ counts: Dict[str, int] = {
63
+ "tables": len(schema.tables),
64
+ "columns": total_columns,
65
+ "views": len(schema.views),
66
+ "sequences": len(schema.sequences),
67
+ "routines": len(schema.routines),
68
+ "triggers": len(schema.triggers),
69
+ "indexes": total_indexes,
70
+ "foreign_keys": total_foreign_keys,
71
+ "types": len(schema.types),
72
+ }
73
+
74
+ return {
75
+ "schema": schema.name,
76
+ "counts": counts,
77
+ "tables": tables,
78
+ }
@@ -0,0 +1,161 @@
1
+ """Renderers for an :class:`~any2heliosdb.assess.report.AssessmentReport`.
2
+
3
+ Three surfaces, mirroring Ora2Pg's report outputs:
4
+
5
+ * :func:`render_text` — a compact plain-text summary for the terminal.
6
+ * :func:`render_json` — ``json.dumps`` of the report (machine-readable).
7
+ * :func:`render_html` — a standalone HTML page via a small Jinja2 template.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ from dataclasses import asdict
13
+ from typing import Any, Dict
14
+
15
+ from jinja2 import Environment
16
+
17
+ from .report import AssessmentReport
18
+
19
+
20
+ def _as_dict(report: AssessmentReport) -> Dict[str, Any]:
21
+ """Report as a plain dict. ``str``-Enums (Edition) serialize as their value."""
22
+ data = asdict(report)
23
+ # ``asdict`` keeps the Enum instance; normalize to its plain string value so
24
+ # both JSON and the HTML template see a string.
25
+ data["edition"] = getattr(report.edition, "value", report.edition)
26
+ return data
27
+
28
+
29
+ def render_json(report: AssessmentReport) -> str:
30
+ """Serialize the full report to indented JSON."""
31
+ return json.dumps(_as_dict(report), indent=2, sort_keys=True)
32
+
33
+
34
+ def render_text(report: AssessmentReport) -> str:
35
+ """Render a compact, human-readable plain-text summary."""
36
+ counts = report.inventory.get("counts", {})
37
+ lines = []
38
+ lines.append("=" * 60)
39
+ lines.append("HeliosDB Migration Assessment")
40
+ lines.append("=" * 60)
41
+ lines.append("Source dialect : {}".format(report.source_dialect))
42
+ lines.append("Target edition : {}".format(getattr(report.edition, "value", report.edition)))
43
+ lines.append("Schema : {}".format(report.inventory.get("schema", "")))
44
+ lines.append("")
45
+ lines.append("Object inventory")
46
+ lines.append("-" * 60)
47
+ for key in (
48
+ "tables",
49
+ "columns",
50
+ "views",
51
+ "sequences",
52
+ "routines",
53
+ "triggers",
54
+ "indexes",
55
+ "foreign_keys",
56
+ "types",
57
+ ):
58
+ if key in counts:
59
+ lines.append(" {:<14}: {}".format(key, counts[key]))
60
+ lines.append("")
61
+ lines.append("Tables")
62
+ lines.append("-" * 60)
63
+ for table in report.inventory.get("tables", []):
64
+ lines.append(
65
+ " {} ({} columns)".format(table.get("name"), table.get("column_count", 0))
66
+ )
67
+ for col in table.get("columns", []):
68
+ lines.append(
69
+ " {:<24} {} -> {}".format(
70
+ col.get("name", ""),
71
+ col.get("source_type", ""),
72
+ col.get("target_sql", ""),
73
+ )
74
+ )
75
+ lines.append("")
76
+ lines.append("Estimated migration cost: {} person-days".format(report.cost_person_days))
77
+ if report.gaps:
78
+ lines.append("")
79
+ lines.append("Gaps ({})".format(len(report.gaps)))
80
+ lines.append("-" * 60)
81
+ for gap in report.gaps:
82
+ lines.append(" - {}".format(gap))
83
+ lines.append("=" * 60)
84
+ return "\n".join(lines)
85
+
86
+
87
+ _HTML_TEMPLATE = """<!DOCTYPE html>
88
+ <html lang="en">
89
+ <head>
90
+ <meta charset="utf-8">
91
+ <title>HeliosDB Migration Assessment - {{ schema }}</title>
92
+ <style>
93
+ body { font-family: -apple-system, Segoe UI, Roboto, sans-serif; margin: 2rem; }
94
+ h1 { font-size: 1.5rem; }
95
+ table { border-collapse: collapse; margin-bottom: 1.5rem; }
96
+ th, td { border: 1px solid #ccc; padding: 4px 10px; text-align: left; }
97
+ th { background: #f4f4f4; }
98
+ .meta td:first-child { font-weight: bold; }
99
+ caption { font-weight: bold; text-align: left; margin-bottom: 4px; }
100
+ </style>
101
+ </head>
102
+ <body>
103
+ <h1>HeliosDB Migration Assessment</h1>
104
+ <table class="meta">
105
+ <tr><td>Source dialect</td><td>{{ source_dialect }}</td></tr>
106
+ <tr><td>Target edition</td><td>{{ edition }}</td></tr>
107
+ <tr><td>Schema</td><td>{{ schema }}</td></tr>
108
+ <tr><td>Estimated cost</td><td>{{ cost_person_days }} person-days</td></tr>
109
+ </table>
110
+
111
+ <table>
112
+ <caption>Object inventory</caption>
113
+ <tr><th>Object</th><th>Count</th></tr>
114
+ {% for key, value in counts.items() %}
115
+ <tr><td>{{ key }}</td><td>{{ value }}</td></tr>
116
+ {% endfor %}
117
+ </table>
118
+
119
+ {% for table in tables %}
120
+ <table>
121
+ <caption>{{ table.name }} ({{ table.column_count }} columns)</caption>
122
+ <tr><th>Column</th><th>Source type</th><th>Target SQL</th><th>Nullable</th></tr>
123
+ {% for col in table.columns %}
124
+ <tr>
125
+ <td>{{ col.name }}</td>
126
+ <td>{{ col.source_type }}</td>
127
+ <td>{{ col.target_sql }}</td>
128
+ <td>{{ col.nullable }}</td>
129
+ </tr>
130
+ {% endfor %}
131
+ </table>
132
+ {% endfor %}
133
+
134
+ {% if gaps %}
135
+ <table>
136
+ <caption>Gaps ({{ gaps|length }})</caption>
137
+ <tr><th>Detail</th></tr>
138
+ {% for gap in gaps %}
139
+ <tr><td>{{ gap }}</td></tr>
140
+ {% endfor %}
141
+ </table>
142
+ {% endif %}
143
+ </body>
144
+ </html>
145
+ """
146
+
147
+
148
+ def render_html(report: AssessmentReport) -> str:
149
+ """Render the report as a standalone HTML page via Jinja2."""
150
+ env = Environment(autoescape=True)
151
+ template = env.from_string(_HTML_TEMPLATE)
152
+ inventory = report.inventory
153
+ return template.render(
154
+ source_dialect=report.source_dialect,
155
+ edition=getattr(report.edition, "value", report.edition),
156
+ schema=inventory.get("schema", ""),
157
+ cost_person_days=report.cost_person_days,
158
+ counts=inventory.get("counts", {}),
159
+ tables=inventory.get("tables", []),
160
+ gaps=report.gaps,
161
+ )
@@ -0,0 +1,125 @@
1
+ """The assessment report — Ora2Pg ``SHOW_REPORT --estimate_cost`` analogue.
2
+
3
+ :func:`build_report` combines:
4
+
5
+ * the schema **inventory** (object/column counts, per-table columns),
6
+ * **type provenance** — for every table column, what the :class:`TypeRegistry`
7
+ resolved the verbatim source type to, and whether that came from a default
8
+ mapping or a user ``DATA_TYPE`` / ``MODIFY_TYPE`` override, and
9
+ * a coarse **migration-cost** estimate in person-days.
10
+
11
+ The real PL/SQL translation cost is produced by the ``plsql`` module and arrives
12
+ as ``gap_report``; here we apply only a deliberately simple placeholder
13
+ heuristic (routines + triggers) so the report is useful before that lands.
14
+ """
15
+ from __future__ import annotations
16
+
17
+ from dataclasses import dataclass, field
18
+ from typing import Any, Dict, List, Optional
19
+
20
+ from ..constants import Edition
21
+ from ..core.catalog_model import Schema
22
+ from ..typemap.registry import TypeRegistry
23
+ from .inventory import schema_inventory
24
+
25
+ # Placeholder cost weights (person-days). Real PL/SQL cost replaces these once
26
+ # the plsql module's gap report is wired in.
27
+ _COST_PER_ROUTINE = 0.25
28
+ _COST_PER_TRIGGER = 0.1
29
+
30
+
31
+ @dataclass
32
+ class AssessmentReport:
33
+ """Structured result of assessing one schema against a target edition."""
34
+
35
+ source_dialect: str
36
+ edition: Edition
37
+ inventory: Dict[str, Any]
38
+ type_provenance: List[Dict[str, Any]] = field(default_factory=list)
39
+ cost_person_days: float = 0.0
40
+ gaps: List[Dict[str, Any]] = field(default_factory=list)
41
+
42
+
43
+ def _gaps_to_list(gap_report: Optional[Any]) -> List[Dict[str, Any]]:
44
+ """Coerce an optional gap report into a JSON-serializable list of dicts.
45
+
46
+ Tolerant of shapes because the producing ``plsql`` module is developed in
47
+ parallel: accepts ``None``, an object exposing a ``.gaps`` iterable, or a
48
+ bare iterable. Each item is normalized to a dict; items already dict-like or
49
+ dataclass-like are passed through, others are stringified.
50
+ """
51
+ if gap_report is None:
52
+ return []
53
+ items = getattr(gap_report, "gaps", gap_report)
54
+ out: List[Dict[str, Any]] = []
55
+ try:
56
+ iterator = iter(items)
57
+ except TypeError:
58
+ return out
59
+ for item in iterator:
60
+ out.append(_gap_item_to_dict(item))
61
+ return out
62
+
63
+
64
+ def _gap_item_to_dict(item: Any) -> Dict[str, Any]:
65
+ if isinstance(item, dict):
66
+ return dict(item)
67
+ as_dict = getattr(item, "__dict__", None)
68
+ if as_dict:
69
+ result: Dict[str, Any] = {}
70
+ for key, value in as_dict.items():
71
+ # Unwrap str-Enums (e.g. Severity) to their plain string value.
72
+ result[key] = getattr(value, "value", value)
73
+ return result
74
+ return {"detail": str(item)}
75
+
76
+
77
+ def build_report(
78
+ schema: Schema,
79
+ registry: TypeRegistry,
80
+ edition: Edition = Edition.UNKNOWN,
81
+ gap_report: Optional[Any] = None,
82
+ ) -> AssessmentReport:
83
+ """Build an :class:`AssessmentReport` for *schema* against *edition*.
84
+
85
+ For every table column, ``registry.resolve`` is consulted (keyed by the
86
+ verbatim ``source_type`` when present, else the column's resolved target
87
+ SQL) and the resulting (source type -> target SQL + provenance) mapping is
88
+ recorded in ``type_provenance``.
89
+ """
90
+ inventory = schema_inventory(schema)
91
+
92
+ type_provenance: List[Dict[str, Any]] = []
93
+ for table in schema.tables:
94
+ for column in table.columns:
95
+ source_type = column.source_type or column.data_type.sql()
96
+ resolved = registry.resolve(
97
+ source_type,
98
+ table=table.name,
99
+ column=column.name,
100
+ schema=table.schema,
101
+ )
102
+ type_provenance.append(
103
+ {
104
+ "table": table.fqn,
105
+ "column": column.name,
106
+ "source_type": source_type,
107
+ "target_sql": resolved.data_type.sql(),
108
+ "provenance": resolved.provenance.value,
109
+ }
110
+ )
111
+
112
+ cost_person_days = round(
113
+ _COST_PER_ROUTINE * len(schema.routines)
114
+ + _COST_PER_TRIGGER * len(schema.triggers),
115
+ 2,
116
+ )
117
+
118
+ return AssessmentReport(
119
+ source_dialect=registry.dialect.value,
120
+ edition=edition,
121
+ inventory=inventory,
122
+ type_provenance=type_provenance,
123
+ cost_person_days=cost_person_days,
124
+ gaps=_gaps_to_list(gap_report),
125
+ )
File without changes
@@ -0,0 +1,143 @@
1
+ """CDC engine: wires registry + source capture + trail + replicat apply.
2
+
3
+ Symmetric Extract -> trail -> Replicat so capture and apply advance on their own
4
+ durable cursors. v1 source is Oracle SCN-watermark; the trail and replicat are
5
+ source-agnostic, so log-based sources (v2) and HeliosDB-as-source drop in here.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import os
10
+ import re
11
+ from typing import Dict, List
12
+
13
+ from ..errors import Any2HeliosError
14
+ from .registry import CdcRegistry, Extract
15
+ from .replicat import Replicat
16
+ from .sources.oracle_scn import OracleScnSource
17
+ from .trail import Trail
18
+
19
+ # HeliosDB-Nano resolved INSERT ... ON CONFLICT DO UPDATE's quoted SET target in
20
+ # v3.58.2 (#34), and v3.58.3 accepts E'...' escaped string literals as values so
21
+ # the replicat's bytea ON CONFLICT upsert (psycopg escapes bytea params as
22
+ # E'\\x..') works. Require 3.58.3 so keyed CDC apply is correct for binary data.
23
+ _NANO_MIN_CDC_VERSION = (3, 58, 3)
24
+
25
+
26
+ def _version_tuple(version: str): # type: ignore[no-untyped-def]
27
+ """First X.Y.Z in a HeliosDB version banner as an int tuple, else None."""
28
+ m = re.search(r"(\d+)\.(\d+)\.(\d+)", version or "")
29
+ return tuple(int(g) for g in m.groups()) if m else None
30
+
31
+
32
+ def _registry_path(cfg) -> str: # type: ignore[no-untyped-def]
33
+ return os.path.join(cfg.options.output_dir, "cdc.db")
34
+
35
+
36
+ def _trail_dir(cfg, name: str) -> str: # type: ignore[no-untyped-def]
37
+ return os.path.join(cfg.options.output_dir, "trail", name)
38
+
39
+
40
+ def _binlog_pos_file(cfg, name: str) -> str: # type: ignore[no-untyped-def]
41
+ return os.path.join(_trail_dir(cfg, name), "binlog.pos")
42
+
43
+
44
+ def run_extract(cfg, name: str) -> Dict[str, object]: # type: ignore[no-untyped-def]
45
+ from ..config.store import build_source_adapter
46
+ from ..constants import SourceDialect
47
+
48
+ reg = CdcRegistry(_registry_path(cfg))
49
+ adapter = build_source_adapter(cfg)
50
+ adapter.connect()
51
+ try:
52
+ schema_ir = adapter.introspect_schema(cfg.source.schema)
53
+ schema_name = cfg.source.schema or schema_ir.name
54
+ reg.register(name, schema_name, [t.name for t in schema_ir.tables])
55
+ ext = reg.get(name)
56
+ assert ext is not None
57
+ trail = Trail(_trail_dir(cfg, name))
58
+
59
+ if cfg.source.dialect is SourceDialect.MYSQL:
60
+ # Log-based capture: real I/U/D from the binlog. Cursor is the binlog
61
+ # coordinate, persisted in a small pos file alongside the trail.
62
+ from .sources.mysql_binlog import MySqlBinlogSource
63
+
64
+ posf = _binlog_pos_file(cfg, name)
65
+ since = ""
66
+ if os.path.exists(posf):
67
+ with open(posf) as f:
68
+ since = f.read().strip()
69
+ source = MySqlBinlogSource(cfg.source.to_dsn(), schema_name, schema_ir.tables)
70
+ records, new_pos = source.capture(since)
71
+ captured = trail.append(records)
72
+ with open(posf, "w") as f:
73
+ f.write(new_pos)
74
+ return {"captured": captured, "watermark": new_pos,
75
+ "since": since or "(current)", "skipped": [], "mode": "binlog"}
76
+
77
+ # Default: Oracle SCN-watermark capture.
78
+ source = OracleScnSource(adapter, schema_name, schema_ir.tables)
79
+ records, new_watermark, skipped = source.capture(ext.watermark)
80
+ captured = trail.append(records)
81
+ reg.set_watermark(name, new_watermark)
82
+ return {"captured": captured, "watermark": new_watermark,
83
+ "since": ext.watermark, "skipped": skipped, "mode": "scn"}
84
+ finally:
85
+ adapter.close()
86
+ reg.close()
87
+
88
+
89
+ def run_replicat(cfg, name: str, reconcile_deletes: bool = True) -> Dict[str, object]: # type: ignore[no-untyped-def]
90
+ from ..config.store import build_source_adapter, build_target_driver
91
+ from ..constants import Edition
92
+
93
+ reg = CdcRegistry(_registry_path(cfg))
94
+ try:
95
+ ext = reg.get(name)
96
+ if ext is None:
97
+ raise Any2HeliosError("no such extract '{}'; run `a2h extract {}` first".format(name, name))
98
+ # Keep the source open: it supplies the apply-side schema (PKs/columns) and,
99
+ # for delete reconciliation, the current key set.
100
+ adapter = build_source_adapter(cfg)
101
+ adapter.connect()
102
+ target = build_target_driver(cfg)
103
+ target.connect()
104
+ try:
105
+ # Gate the apply on a live capability probe: refuse editions whose
106
+ # keyed upsert can't run, with a clear message instead of a cryptic
107
+ # mid-apply SQL error.
108
+ caps = target.probe_capabilities()
109
+ if caps.edition is Edition.NANO:
110
+ ver = _version_tuple(caps.server_version)
111
+ if ver is None or ver < _NANO_MIN_CDC_VERSION:
112
+ raise Any2HeliosError(
113
+ "CDC apply (replicat) on HeliosDB-Nano requires >= {}: before "
114
+ "that, INSERT ... ON CONFLICT DO UPDATE couldn't resolve a quoted "
115
+ "SET target and silently corrupted keyed upserts (#34). Detected "
116
+ "Nano version {!r}. Upgrade Nano, or use `a2h migrate` for a "
117
+ "one-shot load.".format(
118
+ ".".join(map(str, _NANO_MIN_CDC_VERSION)),
119
+ caps.server_version or "unknown"))
120
+ schema_ir = adapter.introspect_schema(ext.schema)
121
+ rep = Replicat(target, schema_ir, cfg.options.preserve_case)
122
+ records, new_cursor = Trail(_trail_dir(cfg, name)).read(ext.apply_cursor)
123
+ applied, warnings = rep.apply(records)
124
+ reg.set_apply_cursor(name, new_cursor)
125
+ deleted = 0
126
+ if reconcile_deletes:
127
+ deleted, dwarn = rep.reconcile_deletes(adapter)
128
+ warnings = warnings + dwarn
129
+ return {"applied": applied, "deleted": deleted, "cursor": new_cursor,
130
+ "read": len(records), "warnings": warnings}
131
+ finally:
132
+ target.close()
133
+ adapter.close()
134
+ finally:
135
+ reg.close()
136
+
137
+
138
+ def list_extracts(cfg) -> List[Extract]: # type: ignore[no-untyped-def]
139
+ reg = CdcRegistry(_registry_path(cfg))
140
+ try:
141
+ return reg.list()
142
+ finally:
143
+ reg.close()
@@ -0,0 +1,76 @@
1
+ """Persistent catalog of named CDC extracts (sqlite).
2
+
3
+ Each extract row carries its capture **watermark** (highest SCN captured) and
4
+ the replicat **apply cursor** (trail lines already applied), so capture and
5
+ apply advance independently and survive process restarts.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import os
10
+ import sqlite3
11
+ from dataclasses import dataclass
12
+ from typing import List, Optional
13
+
14
+
15
+ @dataclass
16
+ class Extract:
17
+ name: str
18
+ schema: str
19
+ tables: List[str]
20
+ watermark: int
21
+ apply_cursor: int
22
+ state: str
23
+
24
+
25
+ class CdcRegistry:
26
+ def __init__(self, path: str) -> None:
27
+ os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
28
+ self._db = sqlite3.connect(path)
29
+ self._db.execute("PRAGMA journal_mode=WAL")
30
+ self._db.execute(
31
+ "CREATE TABLE IF NOT EXISTS extracts ("
32
+ " name TEXT PRIMARY KEY,"
33
+ " schema TEXT,"
34
+ " tables_csv TEXT,"
35
+ " watermark INTEGER NOT NULL DEFAULT 0,"
36
+ " apply_cursor INTEGER NOT NULL DEFAULT 0,"
37
+ " state TEXT NOT NULL DEFAULT 'registered')"
38
+ )
39
+ self._db.commit()
40
+
41
+ def register(self, name: str, schema: str, tables: List[str]) -> None:
42
+ """Create the extract if absent; refresh its table set if it exists."""
43
+ self._db.execute(
44
+ "INSERT INTO extracts (name, schema, tables_csv) VALUES (?,?,?) "
45
+ "ON CONFLICT(name) DO UPDATE SET schema=excluded.schema, tables_csv=excluded.tables_csv",
46
+ (name, schema, ",".join(tables)),
47
+ )
48
+ self._db.commit()
49
+
50
+ def get(self, name: str) -> Optional[Extract]:
51
+ row = self._db.execute(
52
+ "SELECT name, schema, tables_csv, watermark, apply_cursor, state "
53
+ "FROM extracts WHERE name=?", (name,)
54
+ ).fetchone()
55
+ if not row:
56
+ return None
57
+ return Extract(row[0], row[1], [t for t in (row[2] or "").split(",") if t],
58
+ int(row[3]), int(row[4]), row[5])
59
+
60
+ def list(self) -> List[Extract]:
61
+ return [Extract(r[0], r[1], [t for t in (r[2] or "").split(",") if t],
62
+ int(r[3]), int(r[4]), r[5])
63
+ for r in self._db.execute(
64
+ "SELECT name, schema, tables_csv, watermark, apply_cursor, state "
65
+ "FROM extracts ORDER BY name").fetchall()]
66
+
67
+ def set_watermark(self, name: str, scn: int) -> None:
68
+ self._db.execute("UPDATE extracts SET watermark=?, state='capturing' WHERE name=?", (scn, name))
69
+ self._db.commit()
70
+
71
+ def set_apply_cursor(self, name: str, cursor: int) -> None:
72
+ self._db.execute("UPDATE extracts SET apply_cursor=?, state='applying' WHERE name=?", (cursor, name))
73
+ self._db.commit()
74
+
75
+ def close(self) -> None:
76
+ self._db.close()