PyPI - inthub-cli - Versions diffs - 0.1.4__py3-none-any.whl - Mend

inthub-cli 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

inthub/__init__.py +0 -0
inthub/__main__.py +4 -0
inthub/bulk/__init__.py +0 -0
inthub/bulk/csv_state.py +118 -0
inthub/bulk/ddl_converter.py +86 -0
inthub/bulk/gcp_secrets.py +23 -0
inthub/bulk/snowflake_client.py +61 -0
inthub/bulk/sqlserver.py +51 -0
inthub/cli.py +27 -0
inthub/client.py +85 -0
inthub/commands/__init__.py +0 -0
inthub/commands/auth.py +57 -0
inthub/commands/bulk/__init__.py +11 -0
inthub/commands/bulk/manage.py +98 -0
inthub/commands/bulk/progress.py +46 -0
inthub/commands/bulk/src_sqlserver_sink_snowflake/__init__.py +123 -0
inthub/commands/bulk/src_sqlserver_sink_snowflake/connectors.py +134 -0
inthub/commands/bulk/src_sqlserver_sink_snowflake/orchestrator.py +326 -0
inthub/commands/connectors.py +186 -0
inthub/config.py +48 -0
inthub_cli-0.1.4.dist-info/METADATA +17 -0
inthub_cli-0.1.4.dist-info/RECORD +24 -0
inthub_cli-0.1.4.dist-info/WHEEL +4 -0
inthub_cli-0.1.4.dist-info/entry_points.txt +2 -0

inthub/commands/bulk/src_sqlserver_sink_snowflake/__init__.py ADDED Viewed

@@ -0,0 +1,123 @@
+from pathlib import Path
+from typing import Annotated
+import typer
+from ulid import ULID
+from inthub import config
+from inthub.bulk import csv_state
+from inthub.client import InthubClient
+from inthub.commands.bulk.src_sqlserver_sink_snowflake import orchestrator
+app = typer.Typer(
+    help="Create SQL Server source and Snowflake sink connectors in bulk.",
+    add_completion=False,
+)
+def _client() -> InthubClient:
+    return InthubClient(config.require_token())
+@app.callback(invoke_without_command=True)
+def src_sqlserver_sink_snowflake(
+    ctx: typer.Context,
+    csv_file: Annotated[Path | None, typer.Option("--csv", help="Path to input CSV.")] = None,
+    sqlserver_source_plugin: Annotated[
+        str | None, typer.Option("--sqlserver-source-plugin")
+    ] = None,
+    snowflake_sink_plugin: Annotated[
+        str | None, typer.Option("--snowflake-sink-plugin")
+    ] = None,
+    tags: Annotated[
+        str | None, typer.Option("--tags", help="Comma-separated tags.")
+    ] = None,
+    gcp_project: Annotated[
+        str | None, typer.Option("--gcp-project", help="GCP project ID for Secret Manager.")
+    ] = None,
+    resume: Annotated[
+        str | None, typer.Option("--resume", help="ULID of a previous run to resume.")
+    ] = None,
+) -> None:
+    """Provision SQL Server source and Snowflake sink connectors from a CSV."""
+    if ctx.invoked_subcommand is not None:
+        return
+    if resume and any(x is not None for x in [csv_file, sqlserver_source_plugin,
+                                               snowflake_sink_plugin, tags, gcp_project]):
+        typer.echo(
+            "Error: --resume is mutually exclusive with --csv, "
+            "--sqlserver-source-plugin, --snowflake-sink-plugin, --tags, --gcp-project.",
+            err=True,
+        )
+        raise typer.Exit(1)
+    if resume:
+        try:
+            state = csv_state.load_state(resume)
+        except FileNotFoundError:
+            typer.echo(f"Error: state for ULID '{resume}' not found in ~/.inthub/.", err=True)
+            raise typer.Exit(1)
+        progress_path = csv_state._progress_path(resume)
+        if not progress_path.exists():
+            typer.echo(f"Error: progress file '{progress_path}' not found.", err=True)
+            raise typer.Exit(1)
+        source_plugin = state["sqlserver_source_plugin"]
+        sink_plugin = state["snowflake_sink_plugin"]
+        tag_list = [t for t in state.get("tags", "").split(",") if t]
+        resolved_gcp_project = state["gcp_project"]
+    else:
+        missing = [
+            name for name, val in [
+                ("--csv", csv_file),
+                ("--sqlserver-source-plugin", sqlserver_source_plugin),
+                ("--snowflake-sink-plugin", snowflake_sink_plugin),
+                ("--tags", tags),
+                ("--gcp-project", gcp_project),
+            ] if val is None
+        ]
+        if missing:
+            typer.echo(f"Error: missing required options: {', '.join(missing)}", err=True)
+            raise typer.Exit(1)
+        assert csv_file is not None
+        assert sqlserver_source_plugin is not None
+        assert snowflake_sink_plugin is not None
+        assert gcp_project is not None
+        if not csv_file.exists():
+            typer.echo(f"Error: CSV file '{csv_file}' not found.", err=True)
+            raise typer.Exit(1)
+        try:
+            csv_state.validate_columns(csv_file)
+        except ValueError as exc:
+            typer.echo(f"Error: {exc}", err=True)
+            raise typer.Exit(1)
+        ulid = str(ULID())
+        progress_path = csv_state.init_progress_csv(csv_file, ulid)
+        tag_list = [t.strip() for t in (tags or "").split(",") if t.strip()]
+        source_plugin = sqlserver_source_plugin
+        sink_plugin = snowflake_sink_plugin
+        resolved_gcp_project = gcp_project
+        csv_state.save_state(
+            ulid,
+            gcp_project=resolved_gcp_project,
+            sqlserver_source_plugin=source_plugin,
+            snowflake_sink_plugin=sink_plugin,
+            tags=tags or "",
+        )
+        typer.echo(ulid)
+    orchestrator.run(
+        progress_path=progress_path,
+        client=_client(),
+        company_slug=config.require_company_slug(),
+        source_plugin=source_plugin,
+        sink_plugin=sink_plugin,
+        tags=tag_list,
+        gcp_project=resolved_gcp_project,
+    )

inthub/commands/bulk/src_sqlserver_sink_snowflake/connectors.py ADDED Viewed

@@ -0,0 +1,134 @@
+import json
+from typing import Any
+from inthub.client import InthubClient
+def build_source_config(
+    row: dict[str, str],
+    cluster_slug: str,
+    company_slug: str,
+    kcc_bootstrap: str,
+) -> str:
+    secret_ref = (
+        f"${{secrets:inthub-{company_slug}/"
+        f"{row['sqlserver_gcp_secret']}-{cluster_slug}"
+    )
+    cfg: dict[str, Any] = {
+        "database.hostname": row["sqlserver_instance"],
+        "database.port": row["sqlserver_port"],
+        "database.user": f"{secret_ref}:username}}",
+        "database.password": f"{secret_ref}:password}}",
+        "database.names": row["sqlserver_db"],
+        "database.encrypt": False,
+        "topic.prefix": row["connector_name"],
+        "table.include.list": ", ".join(
+            f"dbo.{t.strip()}" for t in row["tables"].split(",")
+        ),
+        "decimal.handling.mode": "string",
+        "tombstones.on.delete": False,
+        "schema.history.internal.kafka.bootstrap.servers": kcc_bootstrap,
+        "schema.history.internal.kafka.topic": f"sh_{row['connector_name']}",
+        "data.query.mode": "direct",
+        "key.converter": "io.confluent.connect.avro.AvroConverter",
+        "key.converter.schema.registry.url": "http://schema-registry.inthub:8081",
+        "key.converter.schemas.enable": True,
+        "value.converter": "io.confluent.connect.avro.AvroConverter",
+        "value.converter.schema.registry.url": "http://schema-registry.inthub:8081",
+        "value.converter.schemas.enable": True,
+        "snapshot.locking.mode": "none",
+        "snapshot.isolation.mode": "read_committed",
+        "snapshot.max.threads": 5,
+    }
+    return json.dumps(cfg)
+def build_sink_config(
+    row: dict[str, str],
+    table: str,
+    cluster_slug: str,
+    company_slug: str,
+) -> str:
+    secret_ref = (
+        f"${{secrets:inthub-{company_slug}/"
+        f"{row['snowflake_gcp_secret']}-{cluster_slug}"
+    )
+    topic = f"{row['connector_name']}.{row['sqlserver_db']}.dbo.{table}"
+    url = (
+        f"jdbc:snowflake://{row['snowflake_account']}"
+        f"?schema={row['sqlserver_db']}&db={row['snowflake_db']}"
+        f"&warehouse={row['snowflake_wh']}&CLIENT_SESSION_KEEP_ALIVE=TRUE&tracing=WARNING"
+    )
+    cfg: dict[str, Any] = {
+        "topics": topic,
+        "url": url,
+        "user": f"{secret_ref}:username}}",
+        "password": f"{secret_ref}:password}}",
+        "schema": row["sqlserver_db"],
+        "key.converter": "io.confluent.connect.avro.AvroConverter",
+        "key.converter.schema.registry.url": "http://schema-registry.inthub:8081",
+        "key.converter.schemas.enable": True,
+        "value.converter": "io.confluent.connect.avro.AvroConverter",
+        "value.converter.schema.registry.url": "http://schema-registry.inthub:8081",
+        "value.converter.schemas.enable": True,
+        "consumer.override.max.poll.records": 1000,
+    }
+    return json.dumps(cfg)
+def create_source_connector(
+    client: InthubClient,
+    row: dict[str, str],
+    cluster_slug: str,
+    company_slug: str,
+    kcc_bootstrap: str,
+    kcc_name: str,
+    plugin: str,
+    tags: list[str],
+) -> str:
+    name = f"{row['connector_name']}-{cluster_slug}"
+    configs = build_source_config(row, cluster_slug, company_slug, kcc_bootstrap)
+    body = client.post("/connectors", json={
+        "name": name,
+        "kafkaConnectCluster": kcc_name,
+        "plugin": plugin,
+        "tags": tags,
+        "configs": configs,
+        "tasksMax": 1,
+    }).json()
+    connector_id: str = body["id"]
+    client.post(f"/connectors/{connector_id}/apply", json={
+        "branch": "main",
+        "commitMessage": f"Bulk: create source connector {name}",
+        "authorName": "inthub-cli",
+    })
+    return connector_id
+def create_sink_connector(
+    client: InthubClient,
+    row: dict[str, str],
+    table: str,
+    cluster_slug: str,
+    company_slug: str,
+    kcc_name: str,
+    plugin: str,
+    tags: list[str],
+) -> str:
+    name = f"{row['connector_name']}-{cluster_slug}"
+    configs = build_sink_config(row, table, cluster_slug, company_slug)
+    body = client.post("/connectors", json={
+        "name": name,
+        "kafkaConnectCluster": kcc_name,
+        "plugin": plugin,
+        "tags": tags,
+        "configs": configs,
+        "tasksMax": 1,
+    }).json()
+    connector_id: str = body["id"]
+    client.post(f"/connectors/{connector_id}/apply", json={
+        "branch": "main",
+        "commitMessage": f"Bulk: create sink connector {name} for table {table}",
+        "authorName": "inthub-cli",
+    })
+    return connector_id

inthub/commands/bulk/src_sqlserver_sink_snowflake/orchestrator.py ADDED Viewed

@@ -0,0 +1,326 @@
+import time
+from pathlib import Path
+from typing import Any
+from rich.console import Console
+from rich.rule import Rule
+from inthub.bulk import csv_state, ddl_converter, gcp_secrets, snowflake_client, sqlserver
+from inthub.client import InthubClient
+from inthub.commands.bulk.src_sqlserver_sink_snowflake import connectors as conn_builder
+console = Console()
+err = Console(stderr=True)
+_ARGOCD_TIMEOUT = 600
+_ARGOCD_POLL = 10
+_CONNECTOR_TIMEOUT = 600
+_CONNECTOR_POLL = 15
+_OK = "[bold green]✓[/]"
+_FAIL = "[bold red]✗[/]"
+def _step_ok(n: int, total: int, msg: str) -> None:
+    console.print(f"  [dim][{n}/{total}][/] {msg}  {_OK}")
+def _step_fail(n: int, total: int, msg: str, reason: str) -> None:
+    console.print(f"  [dim][{n}/{total}][/] {msg}  {_FAIL}")
+    console.print(f"  [bold red]Error:[/] {reason}")
+def _unwrap_list(body: Any) -> list[dict[str, Any]]:
+    if isinstance(body, list):
+        return [item for item in body if isinstance(item, dict)]
+    if isinstance(body, dict):
+        for val in body.values():
+            if isinstance(val, list):
+                return [item for item in val if isinstance(item, dict)]
+    return []
+def _resolve_kcc(client: InthubClient, kcc_name: str) -> dict[str, Any]:
+    body = client.get("/kafka-connect-clusters").json()
+    for c in _unwrap_list(body):
+        if c.get("name") == kcc_name:
+            return c
+    return {}
+def _list_connector_names(client: InthubClient) -> set[str]:
+    body = client.get("/connectors").json()
+    return {str(c.get("name", "")) for c in _unwrap_list(body)}
+def _list_secret_names(client: InthubClient) -> dict[str, dict[str, Any]]:
+    body = client.get("/secrets").json()
+    return {str(s.get("name", "")): s for s in _unwrap_list(body)}
+def _poll_argocd(
+    client: InthubClient,
+    resource_name: str,
+    timeout: int,
+    poll: int,
+) -> bool:
+    deadline = time.time() + timeout
+    while time.time() < deadline:
+        resp = client.get(f"/observability/argocd-status?name={resource_name}").json()
+        if resp.get("argoHealth") == "Healthy" and resp.get("argoSync") == "Synced":
+            return True
+        time.sleep(poll)
+    return False
+def _ensure_secret(
+    client: InthubClient,
+    suffixed_name: str,
+    existing: dict[str, dict[str, Any]],
+) -> str | None:
+    if suffixed_name in existing:
+        s = existing[suffixed_name]
+        if s.get("argoHealth") == "Healthy" and s.get("argoSync") == "Synced":
+            return None
+    if suffixed_name not in existing:
+        client.post("/secrets", json={"name": suffixed_name})
+    if not _poll_argocd(client, suffixed_name, _ARGOCD_TIMEOUT, _ARGOCD_POLL):
+        return f"secret '{suffixed_name}' did not sync within timeout"
+    return None
+def _validate_plugins(client: InthubClient, source_plugin: str, sink_plugin: str) -> str | None:
+    body = client.get("/kc-plugins").json()
+    names = {p.get("name") for p in _unwrap_list(body)}
+    if source_plugin not in names:
+        return f"plugin '{source_plugin}' not found"
+    if sink_plugin not in names:
+        return f"plugin '{sink_plugin}' not found"
+    return None
+_TOTAL_STEPS = 9
+def process_row(
+    row: dict[str, str],
+    progress_path: Path,
+    client: InthubClient,
+    company_slug: str,
+    source_plugin: str,
+    sink_plugin: str,
+    tags: list[str],
+    gcp_project: str,
+) -> None:
+    name = row["connector_name"]
+    csv_state.update_row(progress_path, name, "working")
+    def fail(n: int, msg: str, reason: str) -> None:
+        csv_state.update_row(progress_path, name, "error", reason)
+        _step_fail(n, _TOTAL_STEPS, msg, reason)
+    try:
+        # ── Step 1: Resolve KCCs ─────────────────────────────────────────
+        _msg1 = f"Resolving KCCs [cyan]{row['kcc_source']}[/] / [cyan]{row['kcc_sink']}[/]"
+        kcc_src = _resolve_kcc(client, row["kcc_source"])
+        if not kcc_src:
+            fail(1, _msg1, f"KCC source '{row['kcc_source']}' not found")
+            return
+        kcc_snk = _resolve_kcc(client, row["kcc_sink"])
+        if not kcc_snk:
+            fail(1, _msg1, f"KCC sink '{row['kcc_sink']}' not found")
+            return
+        src_cluster_slug: str = str(kcc_src.get("slug", ""))
+        kcc_bootstrap: str = str(kcc_src.get("bootstrapServers", ""))
+        src_kcc_name: str = str(kcc_src.get("name", row["kcc_source"]))
+        snk_cluster_slug: str = str(kcc_snk.get("slug", ""))
+        snk_kcc_name: str = str(kcc_snk.get("name", row["kcc_sink"]))
+        _step_ok(1, _TOTAL_STEPS, _msg1)
+        # ── Step 2: Fetch GCP secrets ────────────────────────────────────
+        _msg2 = "Fetching GCP secrets"
+        try:
+            ss_creds = gcp_secrets.fetch_secret(
+                gcp_project, f"{row['sqlserver_gcp_secret']}-{src_cluster_slug}"
+            )
+        except Exception as exc:
+            fail(2, _msg2, str(exc))
+            return
+        try:
+            sf_creds = gcp_secrets.fetch_secret(
+                gcp_project, f"{row['snowflake_gcp_secret']}-{snk_cluster_slug}"
+            )
+        except Exception as exc:
+            fail(2, _msg2, str(exc))
+            return
+        _step_ok(2, _TOTAL_STEPS, _msg2)
+        # ── Step 3: Connect to SQL Server and Snowflake ──────────────────
+        _msg3 = "Connecting to SQL Server and Snowflake"
+        try:
+            ss_conn = sqlserver.connect(
+                row["sqlserver_instance"], int(row["sqlserver_port"]),
+                row["sqlserver_db"], ss_creds["username"], ss_creds["password"],
+            )
+        except Exception as exc:
+            fail(3, _msg3, f"SQL Server connection failed: {exc}")
+            return
+        try:
+            sf_conn = snowflake_client.connect(
+                row["snowflake_account"], sf_creds["username"], sf_creds["password"],
+            )
+        except Exception as exc:
+            fail(3, _msg3, f"Snowflake connection failed: {exc}")
+            return
+        _step_ok(3, _TOTAL_STEPS, _msg3)
+        # ── Step 4: Validate tables (CDC) and provision Snowflake ────────
+        tables = [t.strip() for t in row["tables"].split(",")]
+        _msg4 = f"Validating {len(tables)} table(s) and provisioning Snowflake"
+        for table in tables:
+            if not sqlserver.table_exists(ss_conn, row["sqlserver_db"], table):
+                fail(4, _msg4, f"Table '{table}' not found in SQL Server")
+                return
+            if not sqlserver.cdc_enabled(ss_conn, row["sqlserver_db"], table):
+                fail(4, _msg4, f"CDC not enabled on table '{table}'")
+                return
+            if not snowflake_client.ingest_table_exists(
+                sf_conn, row["snowflake_db"], row["sqlserver_db"], table
+            ):
+                columns = sqlserver.get_columns(ss_conn, row["sqlserver_db"], table)
+                ddl = ddl_converter.to_create_table_ddl(
+                    row["snowflake_db"], row["sqlserver_db"], table, columns
+                )
+                snowflake_client.create_ingest_table(sf_conn, ddl)
+                col_names = [str(c["COLUMN_NAME"]) for c in columns]
+                snowflake_client.create_view(
+                    sf_conn, row["snowflake_db"], row["sqlserver_db"], table, col_names
+                )
+                snowflake_client.create_stage(
+                    sf_conn, row["snowflake_db"], row["sqlserver_db"], table
+                )
+        _step_ok(4, _TOTAL_STEPS, _msg4)
+        # ── Step 5: Check connectors don't already exist ──────────────────
+        src_connector_name = f"{name}-{src_cluster_slug}"
+        snk_connector_name = f"{name}-{snk_cluster_slug}"
+        _msg5 = (
+            f"Checking connector availability "
+            f"[cyan]{src_connector_name}[/] / [cyan]{snk_connector_name}[/]"
+        )
+        existing_connectors = _list_connector_names(client)
+        for cname in (src_connector_name, snk_connector_name):
+            if cname in existing_connectors:
+                fail(5, _msg5, f"connector '{cname}' already exists")
+                return
+        _step_ok(5, _TOTAL_STEPS, _msg5)
+        # ── Step 6: Ensure secrets exist and are synced ──────────────────
+        ss_secret_name = f"{row['sqlserver_gcp_secret']}-{src_cluster_slug}"
+        sf_secret_name = f"{row['snowflake_gcp_secret']}-{snk_cluster_slug}"
+        _msg6 = "Ensuring secrets synced in ArgoCD"
+        existing_secrets = _list_secret_names(client)
+        for secret_name in (ss_secret_name, sf_secret_name):
+            err_msg = _ensure_secret(client, secret_name, existing_secrets)
+            if err_msg:
+                fail(6, _msg6, err_msg)
+                return
+        _step_ok(6, _TOTAL_STEPS, _msg6)
+        # ── Step 7: Create source connector ──────────────────────────────
+        _msg7 = f"Creating source connector [cyan]{src_connector_name}[/]"
+        try:
+            src_id = conn_builder.create_source_connector(
+                client, row, src_cluster_slug, company_slug, kcc_bootstrap, src_kcc_name,
+                source_plugin, tags,
+            )
+        except Exception as exc:
+            fail(7, _msg7, f"failed to create source connector: {exc}")
+            return
+        _step_ok(7, _TOTAL_STEPS, _msg7)
+        # ── Step 8: Create sink connectors ───────────────────────────────
+        _msg8 = f"Creating {len(tables)} sink connector(s)"
+        sink_ids: list[str] = []
+        for table in tables:
+            try:
+                sid = conn_builder.create_sink_connector(
+                    client, row, table, snk_cluster_slug, company_slug, snk_kcc_name,
+                    sink_plugin, tags,
+                )
+                sink_ids.append(sid)
+            except Exception as exc:
+                fail(8, _msg8, f"failed to create sink connector for '{table}': {exc}")
+                return
+        _step_ok(8, _TOTAL_STEPS, _msg8)
+        # ── Step 9: Wait for ArgoCD and verify data ───────────────────────
+        _msg9 = "ArgoCD convergence and Snowflake data verified"
+        for cid in [src_id, *sink_ids]:
+            if not _poll_argocd(client, cid, _CONNECTOR_TIMEOUT, _CONNECTOR_POLL):
+                fail(9, _msg9, "ArgoCD convergence timeout")
+                return
+        for table in tables:
+            count = snowflake_client.row_count(
+                sf_conn, row["snowflake_db"], row["sqlserver_db"], table
+            )
+            if count < 1:
+                fail(9, _msg9, f"No records in '{table}' after convergence")
+                return
+        _step_ok(9, _TOTAL_STEPS, _msg9)
+        csv_state.update_row(progress_path, name, "done")
+        console.print(f"  {_OK} [bold green]{name} completed[/]")
+    except Exception as exc:
+        csv_state.update_row(progress_path, name, "error", str(exc))
+        console.print(f"  {_FAIL} Unexpected error")
+        console.print(f"  [bold red]Error:[/] {exc}")
+def run(
+    progress_path: Path,
+    client: InthubClient,
+    company_slug: str,
+    source_plugin: str,
+    sink_plugin: str,
+    tags: list[str],
+    gcp_project: str,
+) -> None:
+    err_msg = _validate_plugins(client, source_plugin, sink_plugin)
+    if err_msg:
+        err.print(f"[bold red]Plugin validation failed:[/] {err_msg}")
+        raise SystemExit(1)
+    rows = csv_state.load_rows(progress_path)
+    for row in rows:
+        if row.get("status") == "working":
+            csv_state.update_row(progress_path, row["connector_name"], "pending")
+    rows = csv_state.load_rows(progress_path)
+    pending = [r for r in rows if r.get("status") != "done"]
+    total = len(rows)
+    for i, row in enumerate(rows):
+        if row.get("status") == "done":
+            continue
+        pos = i + 1
+        console.print(Rule(
+            f"[bold]Row {pos}/{total}: {row['connector_name']}[/]",
+            style="cyan",
+        ))
+        process_row(
+            row, progress_path, client, company_slug,
+            source_plugin, sink_plugin, tags, gcp_project,
+        )
+        console.print()
+    done = sum(1 for r in csv_state.load_rows(progress_path) if r.get("status") == "done")
+    errors = sum(1 for r in csv_state.load_rows(progress_path) if r.get("status") == "error")
+    console.print(Rule(style="dim"))
+    console.print(
+        f"  Finished: [green]{done} done[/]  [red]{errors} error(s)[/]"
+        f"  out of {len(pending)} processed"
+    )